Binary files 0.18.8-1/analysis/distance/closed-syncmers-s15.xxx.fasta.gz.bed.interval.hist.png and 0.19.0-1/analysis/distance/closed-syncmers-s15.xxx.fasta.gz.bed.interval.hist.png differ
Binary files 0.18.8-1/analysis/distance/minimizer-w15.xxx.fasta.gz.bed.interval.hist.png and 0.19.0-1/analysis/distance/minimizer-w15.xxx.fasta.gz.bed.interval.hist.png differ
diff -pruN 0.18.8-1/analysis/distance/README.md 0.19.0-1/analysis/distance/README.md
--- 0.18.8-1/analysis/distance/README.md	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/analysis/distance/README.md	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,76 @@
+# Distances between consecutive k-mers
+
+Number of k-mers sketches:
+    
+    unikmer num -n *.unik
+    
+    446227  closed-syncmers-s15.unik
+    549963  minimizer-w15.unik
+    586734  scaled-minhash-s15.uni
+    
+
+Distribution of distances between consecutive k-mers. Parameters:
+
+- Minimizer: k=31, w=15
+- Closed Syncmers: k=31, s=16 (roughly equal to w=15)
+- Scaled MinHahs: k=31, scale=15
+
+<img src="minimizer-w15.xxx.fasta.gz.bed.interval.hist.png" alt="taxonkit" width="420" />
+<img src="closed-syncmers-s15.xxx.fasta.gz.bed.interval.hist.png" alt="taxonkit" width="420" />
+<img src="scaled-minhash-s15.xxx.fasta.gz.bed.interval.hist.png" alt="taxonkit" width="420" />
+
+Steps:
+    
+    # tools:
+    #     seqkit: https://github.com/shenwei356/seqkit
+    #     unikmer: https://github.com/shenwei356/unikmer
+    #     csvtk: https://github.com/shenwei356/csvtk
+    #     rush: https://github.com/shenwei356/rush
+
+    genome=Ecoli-MG1655.fasta.gz
+    
+    
+
+    # -------------------------------
+    # compute
+    
+    # minimizer (w=15)
+    unikmer count -k 31 -K -s $genome -H -W 15 -o minimizer-w15
+
+    # closed-syncmers (s=16)
+    unikmer count -k 31 -K -s $genome -H -S 16 -o closed-syncmers-s15
+    
+    # scaled minhash (scale=15)
+    unikmer count -k 31 -K -s $genome -H -D 15 -o scaled-minhash-s15
+ 
+
+    
+    # -------------------------------
+    # mapping
+    
+    # convert to FASTA
+    ls *.unik \
+        | rush -v ref=$genome \
+            'unikmer view -g {ref} {} \
+                | awk "{print \$1\"\t\"\$1}" \
+                | seqkit tab2fx -o {}.fasta.gz'
+    
+    # mapping
+    ls *.unik.fasta.gz \
+        | rush -v ref=$genome \
+            'seqkit locate -j 5 --use-fmi --bed -f {} {ref} \
+                | sort -k 2,2n -k 6,6 \
+                > {}.bed'
+    
+    
+    # -------------------------------
+    # ploting
+    
+    # compute interval
+    ls *.bed \
+        | rush -q 'perl -ane "$loc=$F[1]; if($pre==0){$pre=$loc and next;} $d=$loc-$pre; print qq($d\n); $pre=$loc;" {} > {}.interval'
+    
+    # plot
+    ls *.interval \
+        | rush 'csvtk plot hist -Ht {} --title {:} -o {}.hist.png'
+        
Binary files 0.18.8-1/analysis/distance/scaled-minhash-s15.xxx.fasta.gz.bed.interval.hist.png and 0.19.0-1/analysis/distance/scaled-minhash-s15.xxx.fasta.gz.bed.interval.hist.png differ
diff -pruN 0.18.8-1/CHANGELOG.md 0.19.0-1/CHANGELOG.md
--- 0.18.8-1/CHANGELOG.md	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/CHANGELOG.md	2022-04-25 12:44:35.000000000 +0000
@@ -1,47 +1,54 @@
 # Changelog
 
-- v0.18.8
+- v0.19.0 - 2022-04-25
+  - rename command `genautocomplete` to `autocompletion`.
+  - remove command `help`.
+  - change default value of option `-j` from `2` to `4`.
+  - `unikmer count/uniqs/locate`: new flag `-B/--seq-name-filter` for filtering out unwanted sequences like plasmids.
+  - `unikmer count`: add support of `.xz` and `.zst` files.
+- v0.18.8 - 2021-09-17
+  - use new version of nthash with better performance.
   - `unikmer info`: fix typoes.
-- v0.18.7
+- v0.18.7 - 2021-08-30
   - `unikmer`: better counting speed by upstream optimization of FASTA/Q parsing.
   - `unikmer concat`: fix parsing flag `-n`.
-- v0.17.3
+- v0.17.3 - 2021-05-16
   - `unikmer`: fix buiding for 386. #21
-- v0.17.2
+- v0.17.2 - 2021-02-05
   - `unikmer`: slightly speedup for computing LCA.
   - `unikmer rfilter:` 
     - flag `-E/--equal-to` supports multiple values.
     - new flag `-n/--save-predictable-norank`: do not discard some special ranks without order when using -L, where rank of the closest higher node is still lower than rank cutoff.
-- v0.17.1
+- v0.17.1 - 2021-01-18
   - `unikmer rfilter:` change handling of black list.
-- v0.17.0
-  - syncmer value changed with different hash method.
+- v0.17.0 - 2021-01-15
+  - **syncmer value changed with different hash method**.
   - `unikmer count`: syncmer value changed.
-- v0.16.1
+- v0.16.1 - 2020-12-28
   - change Header.Number from `int64` to `uint64`
   - `unikmer info`: fix recounting problem for unsorted kmers but with Number.
-- v0.16.0
+- v0.16.0 - 2020-12-28
   - `unikmer`:
-    - binary file format change: fix reading long description, and bump version to `5.0`.
+    - **binary file format change**: fix reading long description, and bump version to `5.0`.
     - better binary file parsing performance.
-- v0.15.0
+- v0.15.0 - 2020-12-25
   - `unikmer`:
     - binary file minor change: increase description maximal length from 128 B to 1KB.
     - separating k-mers (sketches) indexing and searching from `unikmer`, including `unikmer db info/index/search`.
   - `unikmer count`: fix syncmer.
   - `unikmer dump`: new flag `--hashed`.
   - rename `unikmer stats` to `unikmer info`, and add new column `description`.
-- v0.14.0
+- v0.14.0 - 2020-11-25
   - `unikmer union`: fix bug when flag `-s` not given.
   - `unikmer count/uniqs/locate`: performance improvement on generating k-mers.
   - `unikmer count/db`: support scaled/minizimer/syncmer sketch.
   - `unikmer stats`: change format.
-- v0.13.0
+- v0.13.0 - 2020-10-23
   - new command `unikmer common`: Finding k-mers shared by most of multiple binary files.
   - `unikmer common/count/diff/grep/rfilter/sort/split/union`: faster sorting.
   - `unikmer uniqs`: better result for flag `--circular`.
   - `unikmer search`: fix a bug when searching on database with more than one hash.
-- v0.12.0
+- v0.12.0 - 2020-09-24
   - `unikmer`:
     - support longer k (k>32) by saving ntHash.
     - new flag `-nocheck-file` for not checking binary file.
@@ -63,7 +70,7 @@
     - new flag `-W/--seqs-in-a-file-as-one-genome`.
   - `unikmer count`:
     - new flag `-u/--unique` for output unique (single copy) kmers
-- v0.11.0
+- v0.11.0 - 2020-07-06
   - new command: `unikmer rfilter` for filtering k-mers by taxonomic rank.
   - `unikmer inter`: new flag `-m/--mix-taxid` allowing part of files being whithout taxids.
   - `unikmer dump`: fix a nil pointer bug.
@@ -73,23 +80,23 @@
   - `unikmer count/diff/union`: slightly reduce memory and speedup when sorting k-mers.
   - `unikmer filter`: change scoring.
   - `unikmer count/locate/uniqs`: remove flag `--circular`.
-- v0.10.0
+- v0.10.0 - 2020-05-21
   - `unikmer`: fix loading custom taxonomy files.
   - `unikmer count`:
-    - new flag `-d` for only count duplicated k-mers, for removing singleton in FASTQ.
+    - new flag `-d` for only count duplicate k-mers, for removing singleton in FASTQ.
     - fix nil pointer bug of `-t`.
   - `unikmer split`: fix memery and last odd k-mer mising bug for given ONE sorted input file.
   - `unikmer sort`: skip loading taxonomy data when neither `-u` or `-d` given.
   - `unikmer diff`: 2X speedup, and requiring 1th file being sorted.
   - `unikmer inter`: 2-5X speedup, and requiring all files being sorted, sorted output by default.
-- v0.9.0
+- v0.9.0 - 2020-02-18
   - `unikmer`: **new binary format supporting optional Taxids**.
   - deleted command: `unikmer subset`.
   - new command: `unikmer head` for extracting the first N k-mers.
   - new command: `unikmer tsplit` for splitting k-mers according to taxid.
   - `unikmer grep`: support searching with taxids.
   - `unikmer count`: support parsing taxid from FASTA/Q header.
-- v0.8.0
+- v0.8.0 - 2019-02-09
   - `unikmer`:
     - new option `-i/--infile-list`, if given, files in the list file are appended to files from cli arguments.
     - improve performance of binary file reading and writing.
@@ -99,7 +106,7 @@
   - `unikmer grep`: rewrite, support loading queries from .unik files.
   - `unikmer dump`: fix number information in output file.
   - `unikmer concat`: new flag `-s/--sorted`.
-- v0.7.0
+- v0.7.0 - 2019-09-29
   - new command `unikmer filter`: filter low-complexity k-mers.
   - new command `unikmer split`: split k-mers into sorted chunk files.
   - new command `unikmer merge`: merge from sorted chunk files.
@@ -113,61 +120,61 @@
   - `unikmer union`:
     - new option `-d/--repeated` for only printing duplicate k-mers.
   - `unikmer sort`:
-    - new option `-u/--unique` for removing duplicated k-mers.
+    - new option `-u/--unique` for removing duplicate k-mers.
     - new option `-d/--repeated` for only printing duplicate k-mers.
     - new option `-m/--chunk-size` for limiting maximum memory for sorting.
   - `unikmer diff`:
     - small speed improvements.
-- v0.6.2
+- v0.6.2 - 2019-01-21
   - `unikmer encode`: better output for bits presentation of encoded k-mers (`-a/--all`)
-- v0.6.1
+- v0.6.1 - 2019-01-21
   - `unikmer dump`: 
     - new option `-K/--canonical` to keep the canonical k-mers.
     - new option `-k/--canonical-only` to only keep the canonical k-mers.
     - new option `-s/--sorted` to save sorted k-mers.
   - `unikmer encode`: add option `-K/--canonical` to keep the canonical k-mers.
-- v0.6.0
+- v0.6.0 - 2019-01-20
   - `unikmer`: check encoded integer overflow
   - new command `unikmer encode`: encode plain k-mer text to integer
   - new command `unikmer decode`: decode encoded integer to k-mer text
-- v0.5.3
+- v0.5.3 - 2018-11-28
   - `unikmer count/dump`: check file before handling them.
-- v0.5.2
+- v0.5.2 - 2018-11-28
   - `unikmer locate`: fix bug.
   - `unikmer`: doc update.
-- v0.5.1
+- v0.5.1 - 2018-11-07
   - `unikmer locate/uniqs`: fix options checking.
-- v0.5.0
+- v0.5.0 - 2018-11-07
   - `unikmer diff`: fix concurrency bug when cloning kmers from first file.
   - new command `unikmer locate`: locate Kmers in genome.
   - new command `unikmer uniqs`: mapping Kmers back to genome and find unique subsequences.
-- v0.4.4
+- v0.4.4 - 2018-10-27
   - `unikmer`: add global option `-L/--compression-level`.
   - `unikmer diff`: reduce memory occupation, speed not affected.
-- v0.4.3
+- v0.4.3 - 2018-10-13
   - `unikmer diff`: fix bug of hanging when the first file having no Kmers.
-- v0.4.2
+- v0.4.2 - 2018-10-13
   - `unikmer stats/diff`: more intuitional output
-- v0.4.1
+- v0.4.1 - 2018-10-10
   - Better performance of writing and reading binary files 
-- v0.4.0
+- v0.4.0 - 2018-10-09
   - **Binary serialization format changed.**
   - new command `unikmer sort`: sort binary files
   - `unikmer count/diff/union/inter`: better performance, add option to sort Kmers which significantly reduces file size
   - `unikmer dump`: changed option
   - `unikmer count`: changed option
-- v0.3.1
+- v0.3.1 - 2018-09-25
   - **Binary serialization format changed.**
   - new command `unikmer stats`: statistics of binary files.
   - `unikmer`: adding global option `-i/--infile-list` for reading files listed in file.
   - `unikmer diff`: fixed a concurrency bug when no diff found.
-- v0.2.1
+- v0.2.1 - 2018-09-23
   - `unikmer count`: performance improvement and new option `--canonical` for only keeping canonical Kmers.
-- v0.2
+- v0.2 - 2018-09-09
   - new command `unikmer sample`: sample Kmers from binary files.
   - new global options:
     - `-c, --compact`:     write more compact binary file with little loss of speed.
     - `-C, --no-compress`:   do not compress binary file (not recommended).
   - some improvements.
-- v0.1.0
+- v0.1.0 - 2018-08-09
   - first release
diff -pruN 0.18.8-1/debian/changelog 0.19.0-1/debian/changelog
--- 0.18.8-1/debian/changelog	2021-09-26 12:58:05.000000000 +0000
+++ 0.19.0-1/debian/changelog	2022-08-05 03:49:35.000000000 +0000
@@ -1,3 +1,12 @@
+unikmer (0.19.0-1) unstable; urgency=medium
+
+  * New upstream release.
+  * Update manpage
+  * Add B-D on golang-github-shenwei356-unik.v5-dev
+    and golang-github-shenwei356-kmers-dev
+
+ -- Nilesh Patra <nilesh@debian.org>  Fri, 05 Aug 2022 09:19:35 +0530
+
 unikmer (0.18.8-1) unstable; urgency=medium
 
   * New upstream version
diff -pruN 0.18.8-1/debian/control 0.19.0-1/debian/control
--- 0.18.8-1/debian/control	2021-09-26 12:58:05.000000000 +0000
+++ 0.19.0-1/debian/control	2022-08-05 03:49:05.000000000 +0000
@@ -14,6 +14,8 @@ Build-Depends: debhelper-compat (= 13),
                golang-github-pkg-errors-dev,
                golang-github-shenwei356-bio-dev (>= 0.3.1~),
                golang-github-shenwei356-breader-dev (>= 0.3.1~),
+               golang-github-shenwei356-kmers-dev,
+               golang-github-shenwei356-unik.v5-dev,
                golang-github-shenwei356-util-dev (>= 0.3.1~),
                golang-github-shenwei356-xopen-dev,
                golang-github-spf13-cobra-dev,
@@ -22,7 +24,7 @@ Build-Depends: debhelper-compat (= 13),
                golang-github-twotwotwo-sorts-dev,
                golang-github-will-rowe-nthash-dev (>= 0.4.0~),
                golang-github-zeebo-wyhash-dev
-Standards-Version: 4.6.0
+Standards-Version: 4.6.1
 Vcs-Browser: https://salsa.debian.org/med-team/unikmer
 Vcs-Git: https://salsa.debian.org/med-team/unikmer.git
 Homepage: https://github.com/shenwei356/unikmer
diff -pruN 0.18.8-1/debian/unikmer.1 0.19.0-1/debian/unikmer.1
--- 0.18.8-1/debian/unikmer.1	2021-08-25 21:08:33.000000000 +0000
+++ 0.19.0-1/debian/unikmer.1	2022-08-05 03:49:35.000000000 +0000
@@ -1,9 +1,9 @@
-.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.47.16.
-.TH UNIKMER "1" "August 2021" "unikmer 0.18.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.48.5.
+.TH UNIKMER "1" "August 2022" "unikmer 0.19.0" "User Commands"
 .SH NAME
 unikmer \- Toolkit for nucleic acid k-mer analysis
 .SH DESCRIPTION
-unikmer \- Unique\-Kmer Toolkit
+unikmer \- Toolkit for k\-mer with taxonomic information
 .PP
 unikmer is a toolkit for nucleic acid k\-mer analysis, providing functions
 including set operation on k\-mers optional with TaxIds but without count
@@ -17,11 +17,11 @@ and LCA (Lowest Common Ancestor) is comp
 including computing union, intersection, set difference, unique and
 repeated k\-mers.
 .PP
-Version: v0.17.2
+Version: v0.19.0
 .PP
 Author: Wei Shen <shenwei356@gmail.com>
 .PP
-Documents  : https://shenwei356.github.io/unikmer
+Documents  : https://bioinf.shenwei.me/unikmer
 Source code: https://github.com/shenwei356/unikmer
 .PP
 Dataset (optional):
@@ -33,58 +33,43 @@ ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxd
 or some other directory, and later you can refer to using flag
 \fB\-\-data\-dir\fR or environment variable UNIKMER_DB.
 .IP
-For GTDB, use https://github.com/nick\-youngblut/gtdb_to_taxdump
-for taxonomy conversion.
+For GTDB, use 'taxonkit create\-taxdump' to create NCBI\-style
+taxonomy dump files, or download from:
+.IP
+https://github.com/shenwei356/gtdb\-taxonomy
 .IP
 Note that TaxIds are represented using uint32 and stored in 4 or
-less bytes, all TaxIds should be in range of [1, 4294967295]
+less bytes, all TaxIds should be in the range of [1, 4294967295]
 .SS "Usage:"
 .IP
 unikmer [command]
 .SS "Available Commands:"
-.TP
-common
-Find k\-mers shared by most of multiple binary files
-.TP
-concat
-Concatenate multiple binary files without removing duplicates
-.TP
-count
-Generate k\-mers (sketch) from FASTA/Q sequences
-.TP
-decode
-Decode encoded integer to k\-mer text
-.TP
-diff
-Set difference of multiple binary files
-.TP
-dump
-Convert plain k\-mer text to binary format
-.TP
-encode
-Encode plain k\-mer text to integer
-.TP
-filter
-Filter low\-complexity k\-mers (experimental)
-.IP
-genautocomplete generate shell autocompletion script (bash|zsh|fish|powershell)
-grep            Search k\-mers from binary files
-head            Extract the first N k\-mers
-help            Help about any command
-info            Information of binary files
-inter           Intersection of multiple binary files
-locate          Locate k\-mers in genome
-merge           Merge k\-mers from sorted chunk files
-num             Quickly inspect number of k\-mers in binary files
-rfilter         Filter k\-mers by taxonomic rank
-sample          Sample k\-mers from binary files
-sort            Sort k\-mers in binary files to reduce file size
-split           Split k\-mers into sorted chunk files
-tsplit          Split k\-mers according to taxid
-union           Union of multiple binary files
-uniqs           Mapping k\-mers back to genome and find unique subsequences
-version         Print version information and check for update
-view            Read and output binary format to plain text
+.IP
+autocompletion Generate shell autocompletion script (bash|zsh|fish|powershell)
+common         Find k\-mers shared by most of multiple binary files
+concat         Concatenate multiple binary files without removing duplicates
+count          Generate k\-mers (sketch) from FASTA/Q sequences
+decode         Decode encoded integer to k\-mer text
+diff           Set difference of multiple binary files
+dump           Convert plain k\-mer text to binary format
+encode         Encode plain k\-mer text to integer
+filter         Filter out low\-complexity k\-mers (experimental)
+grep           Search k\-mers from binary files
+head           Extract the first N k\-mers
+info           Information of binary files
+inter          Intersection of multiple binary files
+locate         Locate k\-mers in genome
+merge          Merge k\-mers from sorted chunk files
+num            Quickly inspect number of k\-mers in binary files
+rfilter        Filter k\-mers by taxonomic rank
+sample         Sample k\-mers from binary files
+sort           Sort k\-mers in binary files to reduce file size
+split          Split k\-mers into sorted chunk files
+tsplit         Split k\-mers according to taxid
+union          Union of multiple binary files
+uniqs          Mapping k\-mers back to genome and find unique subsequences
+version        Print version information and check for update
+view           Read and output binary format to plain text
 .SS "Flags:"
 .TP
 \fB\-c\fR, \fB\-\-compact\fR
@@ -94,7 +79,8 @@ write compact binary file with little lo
 compression level (default \fB\-1\fR)
 .TP
 \fB\-\-data\-dir\fR string
-directory containing NCBI Taxonomy files, including nodes.dmp, names.dmp, merged.dmp and delnodes.dmp (default "/home/nilesh/.unikmer")
+directory containing NCBI Taxonomy files, including nodes.dmp,
+names.dmp, merged.dmp and delnodes.dmp (default "/home/nilesh/.unikmer")
 .TP
 \fB\-h\fR, \fB\-\-help\fR
 help for unikmer
@@ -103,24 +89,23 @@ help for unikmer
 ignore taxonomy information
 .TP
 \fB\-i\fR, \fB\-\-infile\-list\fR string
-file of input files list (one file per line), if given, they are appended to files from cli arguments
+file of input files list (one file per line), if given, they are
+appended to files from cli arguments
 .TP
 \fB\-\-max\-taxid\fR uint32
-for smaller TaxIds, we can use less space to store TaxIds. default value is 1<<32\-1, that's enough for NCBI Taxonomy TaxIds (default 4294967295)
+for smaller TaxIds, we can use less space to store TaxIds. default value
+is 1<<32\-1, that's enough for NCBI Taxonomy TaxIds (default 4294967295)
 .TP
 \fB\-C\fR, \fB\-\-no\-compress\fR
 do not compress binary file (not recommended)
 .TP
 \fB\-\-nocheck\-file\fR
-do not check binary file, when using process substitution/named pipe
+do not check binary file, when using process substitution or named pipe
 .TP
 \fB\-j\fR, \fB\-\-threads\fR int
-number of CPUs to use. (default value: 1 for single\-CPU PC, 2 for others) (default 2)
+number of CPUs to use (default 4)
 .TP
 \fB\-\-verbose\fR
 print verbose information
 .PP
 Use "unikmer [command] \fB\-\-help\fR" for more information about a command.
-.SH AUTHOR
- This manpage was written by Nilesh Patra for the Debian distribution and
- can be used for any other usage of the program.
diff -pruN 0.18.8-1/docs/commands.tsv 0.19.0-1/docs/commands.tsv
--- 0.18.8-1/docs/commands.tsv	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/docs/commands.tsv	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,24 @@
+Category	Command	Function	Input	In.sorted	In.flag-consistency	Output	Out.sorted	Out.unique
+Counting	count	Generate k-mers (sketch) from FASTA/Q sequences	fastx	/	/	.unik	optional	optional
+Information	info	Information of binary files	.unik	optional	no need	tsv	/	/
+	num	Quickly inspect number of k-mers in binary files	.unik	optional	no need	tsv	/	/
+Format conversion	view	Read and output binary format to plain text	.unik	optional	required	tsv	/	/
+	dump	Convert plain k-mer text to binary format	tsv	optional	/	.unik	optional	follow input
+	encode	Encode plain k-mer text to integer	tsv	/	/	tsv	/	/
+	decode	Decode encoded integer to k-mer text	tsv	/	/	tsv	/	/
+Set operations	concat	Concatenate multiple binary files without removing duplicates	.unik	optional	required	.unik	optional	no
+	inter	Intersection of multiple binary files	.unik	required	required	.unik	yes	yes
+	common	Find k-mers shared by most of multiple binary files	.unik	required	required	.unik	yes	yes
+	union	Union of multiple binary files	.unik	optional	required	.unik	optional	yes
+	diff	Set difference of multiple binary files	.unik	1th file required	required	.unik	optional	yes
+Split and merge	sort	Sort k-mers in binary files to reduce file size	.unik	optional	required	.unik	yes	optional
+	split	Split k-mers into sorted chunk files	.unik	optional	required	.unik	yes	optional
+	tsplit	Split k-mers according to TaxId	.unik	required	required	.unik	yes	yes
+	merge	Merge k-mers from sorted chunk files	.unik	required	required	.unik	yes	optional
+Subset	head	Extract the first N k-mers	.unik	optional	required	.unik	follow input	follow input
+	sample	Sample k-mers from binary files	.unik	optional	required	.unik	follow input	follow input
+	grep	Search k-mers from binary files	.unik	optional	required	.unik	follow input	optional
+	filter	Filter out low-complexity k-mers	.unik	optional	required	.unik	follow input	follow input
+	rfilter	Filter k-mers by taxonomic rank	.unik	optional	required	.unik	follow input	follow input
+Searching on genomes	locate	Locate k-mers in genome	.unik, fasta	optional	required	tsv	/	/
+	uniqs	Mapping k-mers back to genome and find unique subsequences	.unik, fasta	optional	required	bed/fasta	/	/
diff -pruN 0.18.8-1/docs/download.md 0.19.0-1/docs/download.md
--- 0.18.8-1/docs/download.md	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/docs/download.md	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,323 @@
+# Download
+
+unikmer is implemented in [Go](https://golang.org/) programming language,
+statically-linked executable binary files are [freely available](https://github.com/shenwei356/unikmer/releases).
+
+## Current Version
+
+### [v0.19.0](https://github.com/shenwei356/unikmer/releases/tag/v0.19.0) - 2022-04-25 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.19.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.19.0)
+
+### Links
+
+OS     |Arch      |File, 中国镜像                                                                                                                                                                                  |Download Count
+:------|:---------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+Linux  |**64-bit**|[**unikmer_linux_amd64.tar.gz**](https://github.com/shenwei356/unikmer/releases/download/v0.19.0/unikmer_linux_amd64.tar.gz), <br/> [中国镜像](http://app.shenwei.me/data/unikmer/unikmer_linux_amd64.tar.gz)                  |[![Github Releases (by Asset)](https://img.shields.io/github/downloads/shenwei356/unikmer/latest/unikmer_linux_amd64.tar.gz.svg?maxAge=3600)](https://github.com/shenwei356/unikmer/releases/download/v0.19.0/unikmer_linux_amd64.tar.gz)
+macOS  |**64-bit**|[**unikmer_darwin_amd64.tar.gz**](https://github.com/shenwei356/unikmer/releases/download/v0.19.0/unikmer_darwin_amd64.tar.gz), <br/> [中国镜像](http://app.shenwei.me/data/unikmer/unikmer_darwin_amd64.tar.gz)               |[![Github Releases (by Asset)](https://img.shields.io/github/downloads/shenwei356/unikmer/latest/unikmer_darwin_amd64.tar.gz.svg?maxAge=3600)](https://github.com/shenwei356/unikmer/releases/download/v0.19.0/unikmer_darwin_amd64.tar.gz)
+Windows|**64-bit**|[**unikmer_windows_amd64.exe.tar.gz**](https://github.com/shenwei356/unikmer/releases/download/v0.19.0/unikmer_windows_amd64.exe.tar.gz), <br/> [中国镜像](http://app.shenwei.me/data/unikmer/unikmer_windows_amd64.exe.tar.gz)|[![Github Releases (by Asset)](https://img.shields.io/github/downloads/shenwei356/unikmer/latest/unikmer_windows_amd64.exe.tar.gz.svg?maxAge=3600)](https://github.com/shenwei356/unikmer/releases/download/v0.19.0/unikmer_windows_amd64.exe.tar.gz)
+
+*Notes:*
+
+- please open an issue to request binaries for other platforms.
+- run `unikmer version` to check update !!!
+- run `unikmer autocompletion` to update shell autocompletion script !!!
+
+
+## Installation 
+
+#### Method 1: Install using conda [![Anaconda Cloud](https://anaconda.org/bioconda/unikmer/badges/version.svg)](https://anaconda.org/bioconda/unikmer) [![downloads](https://anaconda.org/bioconda/unikmer/badges/downloads.svg)](https://anaconda.org/bioconda/unikmer)
+
+    conda install -c bioconda unikmer
+
+#### Method 2: Download binaries
+
+[Download](https://github.com/shenwei356/unikmer/releases) the compressed
+executable file of your operating system,
+and decompress it with `tar -zxvf *.tar.gz` command or other tools.
+And then:
+
+- **For Linux-like systems**
+    - If you have root privilege, simply copy it to `/usr/local/bin`:
+
+            sudo cp unikmer /usr/local/bin/
+
+    - Or copy to anywhere in the environment variable `PATH`:
+
+            mkdir -p $HOME/bin/; cp unikmer $HOME/bin/
+
+- **For Windows**, just copy `unikmer.exe` to `C:\WINDOWS\system32`.
+
+
+## Shell-completion
+
+Supported shell: bash|zsh|fish|powershell
+
+Bash:
+
+    # generate completion shell
+    unikmer autocompletion --shell bash
+
+    # configure if never did.
+    # install bash-completion if the "complete" command is not found.
+    echo "for bcfile in ~/.bash_completion.d/* ; do source \$bcfile; done" >> ~/.bash_completion
+    echo "source ~/.bash_completion" >> ~/.bashrc
+
+Zsh:
+
+    # generate completion shell
+    unikmer autocompletion --shell zsh --file ~/.zfunc/_unikmer
+
+    # configure if never did
+    echo 'fpath=( ~/.zfunc "${fpath[@]}" )' >> ~/.zshrc
+    echo "autoload -U compinit; compinit" >> ~/.zshrc
+
+fish:
+
+    unikmer autocompletion --shell fish --file ~/.config/fish/completions/unikmer.fish
+
+## Release History
+
+### [v0.19.0](https://github.com/shenwei356/unikmer/releases/tag/v0.19.0) - 2022-04-25 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.19.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.19.0)
+
+- rename command `genautocomplete` to `autocompletion`.
+- remove command `help`.
+- change default value of option `-j` from `2` to `4`.
+- `unikmer count/uniqs/locate`: new flag `-B/--seq-name-filter` for filtering out unwanted sequences like plasmids.
+- `unikmer count`: add support of `.xz` and `.zst` files.
+
+### [v0.18.8](https://github.com/shenwei356/unikmer/releases/tag/v0.18.8) - 2021-09-17
+
+- use new version of nthash with better performance.
+- `unikmer info`: fix typoes.
+
+### [v0.18.7](https://github.com/shenwei356/unikmer/releases/tag/v0.18.7) - 2021-08-30 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.18.7/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.18.7)
+
+- `unikmer`: better counting speed by upstream optimization of FASTA/Q parsing.
+- `unikmer concat`: fix parsing flag `-n`.
+  
+### [v0.17.3](https://github.com/shenwei356/unikmer/releases/tag/v0.17.3) - 2021-05-16
+
+- `unikmer`: fix buiding for 386. #21
+
+### [v0.17.2](https://github.com/shenwei356/unikmer/releases/tag/v0.17.2) - 2021-02-05 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.17.2/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.17.2)
+
+- `unikmer`: slightly speedup for computing LCA.
+- `unikmer rfilter:` 
+    - flag `-E/--equal-to` supports multiple values.
+    - new flag `-n/--save-predictable-norank`: do not discard some special ranks without order when using -L, where rank of the closest higher node is still lower than rank cutoff.
+
+### [v0.17.1](https://github.com/shenwei356/unikmer/releases/tag/v0.17.1) - 2021-01-18
+
+- `unikmer rfilter:` change handling of black list.
+
+### [v0.17.0](https://github.com/shenwei356/unikmer/releases/tag/v0.17.0) - 2021-01-15
+
+- **syncmer value changed with different hash method**.
+- `unikmer count`: syncmer value changed.
+
+### [v0.16.1](https://github.com/shenwei356/unikmer/releases/tag/v0.16.1) - 2020-12-28 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.16.1/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.16.1)
+
+- change Header.Number from `int64` to `uint64`
+- `unikmer info`: fix recounting problem for unsorted kmers but with Number.
+  
+### [v0.16.0](https://github.com/shenwei356/unikmer/releases/tag/v0.16.0) - 2020-12-28
+
+- `unikmer`:
+    - **binary file format change**: fix reading long description, and bump version to `5.0`.
+    - better binary file parsing performance.
+
+### [v0.15.0](https://github.com/shenwei356/unikmer/releases/tag/v0.15.0) - 2020-12-25 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.15.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.15.0)
+
+- `unikmer`:
+    - binary file minor change: increase description maximal length from 128 B to 1KB.
+    - separating k-mers (sketches) indexing and searching from `unikmer`, including `unikmer db info/index/search`.
+- `unikmer count`: fix syncmer.
+- `unikmer dump`: new flag `--hashed`.
+- rename `unikmer stats` to `unikmer info`, and add new column `description`.
+  
+### [v0.14.0](https://github.com/shenwei356/unikmer/releases/tag/v0.14.0) - 2020-11-25 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.14.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.14.0)
+
+- `unikmer union`: fix bug when flag `-s` not given.
+- `unikmer count/uniqs/locate`: performance improvement on generating k-mers.
+- `unikmer count/db`: support scaled/minizimer/syncmer sketch.
+- `unikmer stats`: change format.
+  
+### [v0.13.0](https://github.com/shenwei356/unikmer/releases/tag/v0.13.0) - 2020-10-23 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.13.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.13.0)
+
+- new command `unikmer common`: Finding k-mers shared by most of multiple binary files.
+- `unikmer common/count/diff/grep/rfilter/sort/split/union`: faster sorting.
+- `unikmer uniqs`: better result for flag `--circular`.
+- `unikmer search`: fix a bug when searching on database with more than one hash.
+  
+### [v0.12.0](https://github.com/shenwei356/unikmer/releases/tag/v0.12.0) - 2020-09-24 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.12.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.12.0)
+
+- `unikmer`:
+    - support longer k (k>32) by saving ntHash.
+    - new flag `-nocheck-file` for not checking binary file.
+- new commands:
+    - `unikmer db index`: constructing index from binary files
+    - `unikmer db info`: printing information of index file
+    - `unikmer db search`: searching sequence from index database
+- `unikmer rfilter`: change format of rank order file.
+- `unikmer inter/union`: speedup for single input file.
+- `unikmer concat`:
+    - new flag `-t/--taxid` for assigning global taxid, this can slightly reduce file size.
+    - new flag `-n/--number` for setting number of k-mers.
+- `unikmer num`:
+    - new flag `-f/--force` for counting k-mers.
+- `unikmer locate`: output in BED6.
+- `unikmer locate/uniqs`: support multiple genome files.
+- `unikmer uniqs`:
+    - stricter multiple mapping limit.
+    - new flag `-W/--seqs-in-a-file-as-one-genome`.
+- `unikmer count`:
+    - new flag `-u/--unique` for output unique (single copy) kmers
+
+### [v0.11.0](https://github.com/shenwei356/unikmer/releases/tag/v0.11.0) - 2020-07-06 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.11.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.11.0)
+
+- new command: `unikmer rfilter` for filtering k-mers by taxonomic rank.
+- `unikmer inter`: new flag `-m/--mix-taxid` allowing part of files being whithout taxids.
+- `unikmer dump`: fix a nil pointer bug.
+- `unikmer count`:
+    - fix checking taxid in sequence header.
+    - fix setting global taxid.
+- `unikmer count/diff/union`: slightly reduce memory and speedup when sorting k-mers.
+- `unikmer filter`: change scoring.
+- `unikmer count/locate/uniqs`: remove flag `--circular`.
+
+### [v0.10.0](https://github.com/shenwei356/unikmer/releases/tag/v0.10.0) - 2020-05-21 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.10.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.10.0)
+
+- `unikmer`: fix loading custom taxonomy files.
+- `unikmer count`:
+    - new flag `-d` for only count duplicate k-mers, for removing singleton in FASTQ.
+    - fix nil pointer bug of `-t`.
+- `unikmer split`: fix memery and last odd k-mer mising bug for given ONE sorted input file.
+- `unikmer sort`: skip loading taxonomy data when neither `-u` or `-d` given.
+- `unikmer diff`: 2X speedup, and requiring 1th file being sorted.
+- `unikmer inter`: 2-5X speedup, and requiring all files being sorted, sorted output by default.
+
+### [v0.9.0](https://github.com/shenwei356/unikmer/releases/tag/v0.9.0) - 2020-02-18 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.9.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.9.0)
+ 
+- `unikmer`: **new binary format supporting optional Taxids**.
+- deleted command: `unikmer subset`.
+- new command: `unikmer head` for extracting the first N k-mers.
+- new command: `unikmer tsplit` for splitting k-mers according to taxid.
+- `unikmer grep`: support searching with taxids.
+- `unikmer count`: support parsing taxid from FASTA/Q header.
+  
+### [v0.8.0](https://github.com/shenwei356/unikmer/releases/tag/v0.8.0) - 2019-02-09 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.8.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.8.0)
+
+- `unikmer`:
+    - new option `-i/--infile-list`, if given, files in the list file are appended to files from cli arguments.
+    - improve performance of binary file reading and writing.
+- `unikmer sort/split/merge`: safer forcing deletion of existed outdir, and better log.
+- `unikmer split`: performance improvement for single sorted input file.
+- `unikmer sort`: performance improvement for using `-m/--chunk-size`.
+- `unikmer grep`: rewrite, support loading queries from .unik files.
+- `unikmer dump`: fix number information in output file.
+- `unikmer concat`: new flag `-s/--sorted`.
+  
+### [v0.7.0](https://github.com/shenwei356/unikmer/releases/tag/v0.7.0) - 2019-09-29 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.7.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.7.0)
+
+- new command `unikmer filter`: filter low-complexity k-mers.
+- new command `unikmer split`: split k-mers into sorted chunk files.
+- new command `unikmer merge`: merge from sorted chunk files.
+- `unikmer view`:
+    - new option `-N/--show-code-only` for only showing encoded integers.
+    - fix output error for `-q/--fastq`.
+- `unikmer uniqs`:
+    - new option `-x/--max-cont-non-uniq-kmers` for limiting max continuous non-unique k-mers.
+    - new option `-X/--max-num-cont-non-uniq-kmers` for limiting max number of continuous non-unique k-mers.
+    - fix bug for `-m/--min-len`.
+- `unikmer union`:
+    - new option `-d/--repeated` for only printing duplicate k-mers.
+- `unikmer sort`:
+    - new option `-u/--unique` for removing duplicate k-mers.
+    - new option `-d/--repeated` for only printing duplicate k-mers.
+    - new option `-m/--chunk-size` for limiting maximum memory for sorting.
+- `unikmer diff`:
+    - small speed improvements.
+
+### [v0.6.2](https://github.com/shenwei356/unikmer/releases/tag/v0.6.2) - 2019-01-21 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.6.2/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.6.2)
+
+- `unikmer encode`: better output for bits presentation of encoded k-mers (`-a/--all`)
+
+### [v0.6.1](https://github.com/shenwei356/unikmer/releases/tag/v0.6.1) - 2019-01-21 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.6.1/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.6.1)
+
+- `unikmer dump`: 
+    - new option `-K/--canonical` to keep the canonical k-mers.
+    - new option `-k/--canonical-only` to only keep the canonical k-mers.
+    - new option `-s/--sorted` to save sorted k-mers.
+- `unikmer encode`: add option `-K/--canonical` to keep the canonical k-mers.
+  
+### [v0.6.0](https://github.com/shenwei356/unikmer/releases/tag/v0.6.0) - 2019-01-20 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.6.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.6.0)
+
+- `unikmer`: check encoded integer overflow
+- new command `unikmer encode`: encode plain k-mer text to integer
+- new command `unikmer decode`: decode encoded integer to k-mer text
+  
+### [v0.5.3](https://github.com/shenwei356/unikmer/releases/tag/v0.5.3) - 2018-11-28
+
+- `unikmer count/dump`: check file before handling them.
+
+### [v0.5.2](https://github.com/shenwei356/unikmer/releases/tag/v0.5.2) - 2018-11-28 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.5.2/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.5.2)
+
+- `unikmer locate`: fix bug.
+- `unikmer`: doc update.
+
+### [v0.5.1](https://github.com/shenwei356/unikmer/releases/tag/v0.5.1) - 2018-11-07 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.5.1/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.5.1)
+
+- `unikmer locate/uniqs`: fix options checking.
+
+### [v0.5.0](https://github.com/shenwei356/unikmer/releases/tag/v0.5.0) - 2018-11-07
+
+- `unikmer diff`: fix concurrency bug when cloning kmers from first file.
+- new command `unikmer locate`: locate Kmers in genome.
+- new command `unikmer uniqs`: mapping Kmers back to genome and find unique subsequences.
+  
+### [v0.4.4](https://github.com/shenwei356/unikmer/releases/tag/v0.4.4) - 2018-10-27 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.4.4/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.4.4)
+
+- `unikmer`: add global option `-L/--compression-level`.
+- `unikmer diff`: reduce memory occupation, speed not affected.
+
+### [v0.4.3](https://github.com/shenwei356/unikmer/releases/tag/v0.4.3) - 2018-10-13 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.4.3/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.4.3)
+
+- `unikmer diff`: fix bug of hanging when the first file having no Kmers.
+
+### [v0.4.2](https://github.com/shenwei356/unikmer/releases/tag/v0.4.2) - 2018-10-13 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.4.2/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.4.2)
+
+- `unikmer stats/diff`: more intuitional output
+
+### [v0.4.1](https://github.com/shenwei356/unikmer/releases/tag/v0.4.1) - 2018-10-10 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.4.1/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.4.1)
+
+- Better performance of writing and reading binary files 
+
+### [v0.4.0](https://github.com/shenwei356/unikmer/releases/tag/v0.4.0) - 2018-10-09 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.4.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.4.0)
+
+- **Binary serialization format changed.**
+- new command `unikmer sort`: sort binary files
+- `unikmer count/diff/union/inter`: better performance, add option to sort Kmers which significantly reduces file size
+- `unikmer dump`: changed option
+- `unikmer count`: changed option
+
+### [v0.3.1](https://github.com/shenwei356/unikmer/releases/tag/v0.3.1) - 2018-09-25 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.3.1/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.3.1)
+
+- **Binary serialization format changed.**
+- new command `unikmer stats`: statistics of binary files.
+- `unikmer`: adding global option `-i/--infile-list` for reading files listed in file.
+- `unikmer diff`: fixed a concurrency bug when no diff found.
+
+### [v0.2.1](https://github.com/shenwei356/unikmer/releases/tag/v0.2.1) - 2018-09-23 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.2.1/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.2.1)
+
+- `unikmer count`: performance improvement and new option `--canonical` for only keeping canonical Kmers.
+
+### [v0.2.0](https://github.com/shenwei356/unikmer/releases/tag/v0.2) - 2018-09-09 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.2/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.2)
+
+- new command `unikmer sample`: sample Kmers from binary files.
+- new global options:
+- `-c, --compact`:     write more compact binary file with little loss of speed.
+- `-C, --no-compress`:   do not compress binary file (not recommended).
+- some improvements.
+
+### [v0.1.0](https://github.com/shenwei356/unikmer/releases/tag/v0.1.0) - 2018-08-09 [![Github Releases (by Release)](https://img.shields.io/github/downloads/shenwei356/unikmer/v0.1.0/total.svg)](https://github.com/shenwei356/unikmer/releases/tag/v0.1.0)
+
+- first release
diff -pruN 0.18.8-1/docs/index.md 0.19.0-1/docs/index.md
--- 0.18.8-1/docs/index.md	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/docs/index.md	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,301 @@
+# unikmer: Toolkit for k-mer with taxonomic information
+
+`unikmer` is a toolkit for nucleic acid [k-mer](https://en.wikipedia.org/wiki/K-mer) analysis, 
+providing functions
+including set operation k-mers (sketch) optional with
+TaxIds but without count information.
+
+K-mers are either encoded (k<=32) or hashed (arbitrary k) into `uint64`,
+and serialized in binary file with extension `.unik`.
+
+TaxIds can be assigned when counting k-mers from genome sequences,
+and LCA (Lowest Common Ancestor) is computed during set opertions
+including computing union, intersecton, set difference, unique and
+repeated k-mers.
+
+<!-- START doctoc generated TOC please keep comment here to allow auto update -->
+<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
+## Table of Contents
+
+- [Using cases](#using-cases)
+- [Installation](#installation)
+- [Commands](#commands)
+- [Binary file](#binary-file)
+- [Quick start](#quick-start)
+- [Support](#support)
+- [License](#license)
+
+<!-- END doctoc generated TOC please keep comment here to allow auto update -->
+
+## Using cases
+
+- Finding conserved regions in all genomes of a species.
+- Finding species/strain-specific sequences for designing probes/primers.
+
+## Installation
+
+1. Downloading [executable binary files](https://github.com/shenwei356/unikmer/releases).
+
+1. Via Bioconda [![Anaconda Cloud](https://anaconda.org/bioconda/unikmer/badges/version.svg)](https://anaconda.org/bioconda/unikmer) [![downloads](https://anaconda.org/bioconda/unikmer/badges/downloads.svg)](https://anaconda.org/bioconda/unikmer)
+
+        conda install -c bioconda unikmer
+
+## Commands
+
+[Usages](https://bioinf.shenwei.me/unikmer/usage)
+
+1. Counting
+
+        count           Generate k-mers (sketch) from FASTA/Q sequences
+
+1. Information
+
+        info            Information of binary files
+        num             Quickly inspect number of k-mers in binary files
+
+1. Format conversion
+
+        view            Read and output binary format to plain text
+        dump            Convert plain k-mer text to binary format
+
+        encode          Encode plain k-mer text to integer
+        decode          Decode encoded integer to k-mer text
+        
+
+1. Set operations
+
+        concat          Concatenate multiple binary files without removing duplicates
+        inter           Intersection of multiple binary files
+        common          Find k-mers shared by most of multiple binary files
+        union           Union of multiple binary files
+        diff            Set difference of multiple binary files
+
+1. Split and merge
+
+        sort            Sort k-mers in binary files to reduce file size
+        split           Split k-mers into sorted chunk files
+        tsplit          Split k-mers according to TaxId
+        merge           Merge k-mers from sorted chunk files
+
+1. Subset
+
+        head            Extract the first N k-mers
+        sample          Sample k-mers from binary files
+        grep            Search k-mers from binary files
+        filter          Filter out low-complexity k-mers
+        rfilter         Filter k-mers by taxonomic rank
+
+1. Searching on genomes
+
+        locate          Locate k-mers in genome
+        uniqs           Mapping k-mers back to genome and find unique subsequences
+
+1. Misc
+
+        autocompletion  Generate shell autocompletion script
+        version         Print version information and check for update
+
+## Binary file
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/shenwei356/unik.svg)](https://pkg.go.dev/github.com/shenwei356/unik)
+
+K-mers (represented in `uint64` in RAM ) are serialized in 8-Byte
+(or less Bytes for shorter k-mers in compact format,
+or much less Bytes for sorted k-mers) arrays and
+optionally compressed in gzip format with extension of `.unik`.
+TaxIds are optionally stored next to k-mers with 4 or less bytes.
+
+### Compression ratio comparison
+
+No TaxIds stored in this test.
+
+![cr.jpg](testdata/cr.jpg)
+
+label           |encoded-kmer<sup>a</sup>|gzip-compressed<sup>b</sup>|compact-format<sup>c</sup>|sorted<sup>d</sup>|comment
+:---------------|:----------------------:|:-------------------------:|:------------------------:|:----------------:|:------------------------------------------------------
+`plain`         |                        |                           |                          |                  |plain text
+`gzip`          |                        |✔                          |                          |                  |gzipped plain text
+`unik.default`  |✔                       |✔                          |                          |                  |gzipped encoded k-mers in fixed-length byte array
+`unik.compat`   |✔                       |✔                          |✔                         |                  |gzipped encoded k-mers in shorter fixed-length byte array
+`unik.sorted`   |✔                       |✔                          |                          |✔                 |gzipped sorted encoded k-mers
+
+
+- <sup>a</sup> One k-mer is encoded as `uint64` and serialized in 8 Bytes.
+- <sup>b</sup> K-mers file is compressed in gzip format by default,
+  users can switch on global option `-C/--no-compress` to output non-compressed file.
+- <sup>c</sup> One k-mer is encoded as `uint64` and serialized in 8 Bytes by default.
+ However few Bytes are needed for short k-mers, e.g., 4 Bytes are enough for
+  15-mers (30 bits). This makes the file more compact with smaller file size,
+  controled by global option `-c/--compact `.
+- <sup>d</sup> One k-mer is encoded as `uint64`, all k-mers are sorted and compressed
+  using varint-GB algorithm.
+- In all test, flag `--canonical` is ON when running `unikmer count`.
+
+
+## Quick Start
+
+
+    # memusg is for compute time and RAM usage: https://github.com/shenwei356/memusg
+
+
+    # counting (only keep the canonical k-mers and compact output)
+    # memusg -t unikmer count -k 23 Ecoli-IAI39.fasta.gz -o Ecoli-IAI39.fasta.gz.k23 --canonical --compact
+    $ memusg -t unikmer count -k 23 Ecoli-MG1655.fasta.gz -o Ecoli-MG1655.fasta.gz.k23 --canonical --compact
+    elapsed time: 0.897s
+    peak rss: 192.41 MB
+
+
+    # counting (only keep the canonical k-mers and sort k-mers)
+    # memusg -t unikmer count -k 23 Ecoli-IAI39.fasta.gz -o Ecoli-IAI39.fasta.gz.k23.sorted --canonical --sort
+    $ memusg -t unikmer count -k 23 Ecoli-MG1655.fasta.gz -o Ecoli-MG1655.fasta.gz.k23.sorted --canonical --sort
+    elapsed time: 1.136s
+    peak rss: 227.28 MB
+    
+    
+    # counting and assigning global TaxIds
+    $ unikmer count -k 23 -K -s Ecoli-IAI39.fasta.gz -o Ecoli-IAI39.fasta.gz.k23.sorted   -t 585057
+    $ unikmer count -k 23 -K -s Ecoli-MG1655.fasta.gz -o Ecoli-MG1655.fasta.gz.k23.sorted -t 511145
+    $ unikmer count -k 23 -K -s A.muciniphila-ATCC_BAA-835.fasta.gz -o A.muciniphila-ATCC_BAA-835.fasta.gz.sorted -t 349741
+    
+    # counting minimizer and ouputting in linear order
+    $ unikmer count -k 23 -W 5 -H -K -l A.muciniphila-ATCC_BAA-835.fasta.gz -o A.muciniphila-ATCC_BAA-835.fasta.gz.m
+
+    # view
+    $ unikmer view Ecoli-MG1655.fasta.gz.k23.sorted.unik --show-taxid | head -n 3
+    AAAAAAAAACCATCCAAATCTGG 511145
+    AAAAAAAAACCGCTAGTATATTC 511145
+    AAAAAAAAACCTGAAAAAAACGG 511145
+    
+    # view (hashed k-mers needs original FASTA/Q file)
+    $ unikmer view --show-code --genome A.muciniphila-ATCC_BAA-835.fasta.gz A.muciniphila-ATCC_BAA-835.fasta.gz.m.unik | head -n 3
+    CATCCGCCATCTTTGGGGTGTCG 1210726578792
+    AGCGCAAAATCCCCAAACATGTA 2286899379883
+    AACTGATTTTTGATGATGACTCC 3542156397282
+    
+    # find the positions of k-mers
+    $ seqkit locate -M A.muciniphila-ATCC_BAA-835.fasta.gz \
+        -f <(unikmer view -a -g A.muciniphila-ATCC_BAA-835.fasta.gz A.muciniphila-ATCC_BAA-835.fasta.gz.m.unik | seqkit head -n 5 ) \
+        | csvtk sort -t -k start:n | head -n 6 | csvtk pretty -t
+    seqID         patternName           pattern                   strand   start   end
+    -----------   -------------------   -----------------------   ------   -----   ---
+    NC_010655.1   2090893901864583115   ATCTTATAAAATAACCACATAAC   +        3       25
+    NC_010655.1   696051979077366638    TTATAAAATAACCACATAACTTA   +        6       28
+    NC_010655.1   390297872016815006    TATAAAATAACCACATAACTTAA   +        7       29
+    NC_010655.1   2582400417208090837   AAAATAACCACATAACTTAAAAA   +        10      32
+    NC_010655.1   3048591415312050785   TAACCACATAACTTAAAAAGAAT   +        14      36
+    
+    # stats
+    $ unikmer stats *.unik -a -j 10
+    file                                              k  canonical  hashed  scaled  include-taxid  global-taxid  sorted  compact  gzipped  version     number  description
+    A.muciniphila-ATCC_BAA-835.fasta.gz.m.unik       23          ✓       ✓       ✕              ✕                     ✕        ✕        ✓     v5.0    860,900  
+    A.muciniphila-ATCC_BAA-835.fasta.gz.sorted.unik  23          ✓       ✕       ✕              ✕        349741       ✓        ✕        ✓     v5.0  2,630,905  
+    Ecoli-IAI39.fasta.gz.k23.sorted.unik             23          ✓       ✕       ✕              ✕        585057       ✓        ✕        ✓     v5.0  4,902,266  
+    Ecoli-IAI39.fasta.gz.k23.unik                    23          ✓       ✕       ✕              ✕                     ✕        ✓        ✓     v5.0  4,902,266  
+    Ecoli-MG1655.fasta.gz.k23.sorted.unik            23          ✓       ✕       ✕              ✕        511145       ✓        ✕        ✓     v5.0  4,546,632  
+    Ecoli-MG1655.fasta.gz.k23.unik                   23          ✓       ✕       ✕              ✕                     ✕        ✓        ✓     v5.0  4,546,632 
+
+    
+    # concat
+    $ memusg -t unikmer concat *.k23.sorted.unik -o concat.k23 -c
+    elapsed time: 1.020s
+    peak rss: 25.86 MB
+
+
+    
+    # union
+    $ memusg -t unikmer union *.k23.sorted.unik -o union.k23 -s
+    elapsed time: 3.991s
+    peak rss: 590.92 MB
+    
+    
+    # or sorting with limited memory.
+    # note that taxonomy database need some memory.
+    $ memusg -t unikmer sort *.k23.sorted.unik -o union2.k23 -u -m 1M
+    elapsed time: 3.538s
+    peak rss: 324.2 MB
+    
+    $ unikmer view -t union.k23.unik | md5sum 
+    4c038832209278840d4d75944b29219c  -
+    $ unikmer view -t union2.k23.unik | md5sum 
+    4c038832209278840d4d75944b29219c  -
+    
+    
+    # duplicate k-mers
+    $ memusg -t unikmer sort *.k23.sorted.unik -o dup.k23 -d -m 1M
+    elapsed time: 1.143s
+    peak rss: 240.18 MB
+
+    
+    # intersection
+    $ memusg -t unikmer inter *.k23.sorted.unik -o inter.k23
+    elapsed time: 1.481s
+    peak rss: 399.94 MB
+    
+
+    # difference
+    $ memusg -t unikmer diff -j 10 *.k23.sorted.unik -o diff.k23 -s
+    elapsed time: 0.793s
+    peak rss: 338.06 MB
+
+
+    $ ls -lh *.unik
+    -rw-r--r-- 1 shenwei shenwei 9.5M  2月 13 00:55 A.muciniphila-ATCC_BAA-835.fasta.gz.sorted.unik
+    -rw-r--r-- 1 shenwei shenwei  46M  2月 13 00:59 concat.k23.unik
+    -rw-r--r-- 1 shenwei shenwei 8.7M  2月 13 01:00 diff.k23.unik
+    -rw-r--r-- 1 shenwei shenwei  11M  2月 13 01:04 dup.k23.unik
+    -rw-r--r-- 1 shenwei shenwei  18M  2月 13 00:55 Ecoli-IAI39.fasta.gz.k23.sorted.unik
+    -rw-r--r-- 1 shenwei shenwei  21M  2月 13 00:48 Ecoli-IAI39.fasta.gz.k23.unik
+    -rw-r--r-- 1 shenwei shenwei  17M  2月 13 00:55 Ecoli-MG1655.fasta.gz.k23.sorted.unik
+    -rw-r--r-- 1 shenwei shenwei  19M  2月 13 00:48 Ecoli-MG1655.fasta.gz.k23.unik
+    -rw-r--r-- 1 shenwei shenwei 9.5M  2月 13 00:59 inter.k23.unik
+    -rw-r--r-- 1 shenwei shenwei  27M  2月 13 01:04 union2.k23.unik
+    -rw-r--r-- 1 shenwei shenwei  27M  2月 13 00:58 union.k23.unik
+
+
+    $ unikmer stats *.unik -a -j 10
+    file                                              k  canonical  hashed  scaled  include-taxid  global-taxid  sorted  compact  gzipped  version     number  description
+    A.muciniphila-ATCC_BAA-835.fasta.gz.m.unik       23          ✓       ✓       ✕              ✕                     ✕        ✕        ✓     v5.0    860,900  
+    A.muciniphila-ATCC_BAA-835.fasta.gz.sorted.unik  23          ✓       ✕       ✕              ✕        349741       ✓        ✕        ✓     v5.0  2,630,905  
+    concat.k23.unik                                  23          ✓       ✕       ✕              ✓                     ✕        ✓        ✓     v5.0         -1  
+    diff.k23.unik                                    23          ✓       ✕       ✕              ✓                     ✕        ✕        ✓     v5.0  2,326,096  
+    dup.k23.unik                                     23          ✓       ✕       ✕              ✓                     ✓        ✕        ✓     v5.0          0  
+    Ecoli-IAI39.fasta.gz.k23.sorted.unik             23          ✓       ✕       ✕              ✕        585057       ✓        ✕        ✓     v5.0  4,902,266  
+    Ecoli-IAI39.fasta.gz.k23.unik                    23          ✓       ✕       ✕              ✕                     ✕        ✓        ✓     v5.0  4,902,266  
+    Ecoli-MG1655.fasta.gz.k23.sorted.unik            23          ✓       ✕       ✕              ✕        511145       ✓        ✕        ✓     v5.0  4,546,632  
+    Ecoli-MG1655.fasta.gz.k23.unik                   23          ✓       ✕       ✕              ✕                     ✕        ✓        ✓     v5.0  4,546,632  
+    inter.k23.unik                                   23          ✓       ✕       ✕              ✓                     ✓        ✕        ✓     v5.0  2,576,170  
+    union2.k23.unik                                  23          ✓       ✕       ✕              ✓                     ✓        ✕        ✓     v5.0  6,872,728  
+    union.k23.unik                                   23          ✓       ✕       ✕              ✓                     ✓        ✕        ✓     v5.0  6,872,728
+
+
+    # -----------------------------------------------------------------------------------------
+
+    # mapping k-mers to genome
+    g=Ecoli-IAI39.fasta
+    f=inter.k23.unik
+
+    # to fasta
+    unikmer view $f -a -o $f.fa.gz
+
+    # make index
+    bwa index $g; samtools faidx $g
+
+    ncpu=12
+    ls $f.fa.gz \
+        | rush -j 1 -v ref=$g -v j=$ncpu \
+            'bwa aln -o 0 -l 17 -k 0 -t {j} {ref} {} \
+                | bwa samse {ref} - {} \
+                | samtools view -bS > {}.bam; \
+             samtools sort -T {}.tmp -@ {j} {}.bam -o {}.sorted.bam; \
+             samtools index {}.sorted.bam; \
+             samtools flagstat {}.sorted.bam > {}.sorted.bam.flagstat; \
+             /bin/rm {}.bam '  
+
+## Support
+
+Please [open an issue](https://github.com/shenwei356/unikmer/issues) to report bugs,
+propose new functions or ask for help.
+
+## License
+
+[MIT License](https://github.com/shenwei356/unikmer/blob/master/LICENSE)
Binary files 0.18.8-1/docs/testdata/cr.jpg and 0.19.0-1/docs/testdata/cr.jpg differ
diff -pruN 0.18.8-1/docs/usage.md 0.19.0-1/docs/usage.md
--- 0.18.8-1/docs/usage.md	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/docs/usage.md	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,735 @@
+# Usage
+
+## summary
+
+|Category            |Command|Input       |In.sorted        |In.flag-consistency|Output   |Out.sorted  |Out.unique  |
+|:-------------------|:------|:-----------|:----------------|:------------------|:--------|:-----------|:-----------|
+|Counting            |count  |fastx       |/                |/                  |.unik    |optional    |optional    |
+|Information         |info   |.unik       |optional         |no need            |tsv      |/           |/           |
+|                    |num    |.unik       |optional         |no need            |tsv      |/           |/           |
+|Format conversion   |view   |.unik       |optional         |required           |tsv      |/           |/           |
+|                    |dump   |tsv         |optional         |/                  |.unik    |optional    |follow input|
+|                    |encode |tsv         |/                |/                  |tsv      |/           |/           |
+|                    |decode |tsv         |/                |/                  |tsv      |/           |/           |
+|Set operations      |concat |.unik       |optional         |required           |.unik    |optional    |no          |
+|                    |inter  |.unik       |required         |required           |.unik    |yes         |yes         |
+|                    |common |.unik       |required         |required           |.unik    |yes         |yes         |
+|                    |union  |.unik       |optional         |required           |.unik    |optional    |yes         |
+|                    |diff   |.unik       |1th file required|required           |.unik    |optional    |yes         |
+|Split and merge     |sort   |.unik       |optional         |required           |.unik    |yes         |optional    |
+|                    |split  |.unik       |optional         |required           |.unik    |yes         |optional    |
+|                    |tsplit |.unik       |required         |required           |.unik    |yes         |yes         |
+|                    |merge  |.unik       |required         |required           |.unik    |yes         |optional    |
+|Subset              |head   |.unik       |optional         |required           |.unik    |follow input|follow input|
+|                    |sample |.unik       |optional         |required           |.unik    |follow input|follow input|
+|                    |grep   |.unik       |optional         |required           |.unik    |follow input|optional    |
+|                    |filter |.unik       |optional         |required           |.unik    |follow input|follow input|
+|                    |rfilter|.unik       |optional         |required           |.unik    |follow input|follow input|
+|Searching on genomes|locate |.unik, fasta|optional         |required           |tsv      |/           |/           |
+|                    |uniqs  |.unik, fasta|optional         |required           |bed/fasta|/           |/           |
+
+## unikmer
+
+```text
+unikmer - Toolkit for k-mer with taxonomic information
+
+unikmer is a toolkit for nucleic acid k-mer analysis, providing functions
+including set operation on k-mers optional with TaxIds but without count
+information.
+
+K-mers are either encoded (k<=32) or hashed (arbitrary k) into 'uint64',
+and serialized in binary file with extension '.unik'.
+
+TaxIds can be assigned when counting k-mers from genome sequences,
+and LCA (Lowest Common Ancestor) is computed during set opertions
+including computing union, intersection, set difference, unique and
+repeated k-mers.
+
+Version: v0.19.0
+
+Author: Wei Shen <shenwei356@gmail.com>
+
+Documents  : https://bioinf.shenwei.me/unikmer
+Source code: https://github.com/shenwei356/unikmer
+
+Dataset (optional):
+
+  Manipulating k-mers with TaxIds needs taxonomy file from e.g., 
+  NCBI Taxonomy database, please extract "nodes.dmp", "names.dmp",
+  "delnodes.dmp" and "merged.dmp" from link below into ~/.unikmer/ ,
+  ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz , 
+  or some other directory, and later you can refer to using flag
+  --data-dir or environment variable UNIKMER_DB.
+
+  For GTDB, use 'taxonkit create-taxdump' to create NCBI-style
+  taxonomy dump files, or download from:
+    https://github.com/shenwei356/gtdb-taxonomy
+
+  Note that TaxIds are represented using uint32 and stored in 4 or
+  less bytes, all TaxIds should be in the range of [1, 4294967295]
+
+Usage:
+  unikmer [command] 
+
+Available Commands:
+  autocompletion Generate shell autocompletion script (bash|zsh|fish|powershell)
+  common         Find k-mers shared by most of multiple binary files
+  concat         Concatenate multiple binary files without removing duplicates
+  count          Generate k-mers (sketch) from FASTA/Q sequences
+  decode         Decode encoded integer to k-mer text
+  diff           Set difference of multiple binary files
+  dump           Convert plain k-mer text to binary format
+  encode         Encode plain k-mer text to integer
+  filter         Filter out low-complexity k-mers (experimental)
+  grep           Search k-mers from binary files
+  head           Extract the first N k-mers
+  info           Information of binary files
+  inter          Intersection of multiple binary files
+  locate         Locate k-mers in genome
+  merge          Merge k-mers from sorted chunk files
+  num            Quickly inspect number of k-mers in binary files
+  rfilter        Filter k-mers by taxonomic rank
+  sample         Sample k-mers from binary files
+  sort           Sort k-mers in binary files to reduce file size
+  split          Split k-mers into sorted chunk files
+  tsplit         Split k-mers according to taxid
+  union          Union of multiple binary files
+  uniqs          Mapping k-mers back to genome and find unique subsequences
+  version        Print version information and check for update
+  view           Read and output binary format to plain text
+
+Flags:
+  -c, --compact                 write compact binary file with little loss of speed
+      --compression-level int   compression level (default -1)
+      --data-dir string         directory containing NCBI Taxonomy files, including nodes.dmp,
+                                names.dmp, merged.dmp and delnodes.dmp (default "/home/shenwei/.unikmer")
+  -h, --help                    help for unikmer
+  -I, --ignore-taxid            ignore taxonomy information
+  -i, --infile-list string      file of input files list (one file per line), if given, they are
+                                appended to files from cli arguments
+      --max-taxid uint32        for smaller TaxIds, we can use less space to store TaxIds. default value
+                                is 1<<32-1, that's enough for NCBI Taxonomy TaxIds (default 4294967295)
+  -C, --no-compress             do not compress binary file (not recommended)
+      --nocheck-file            do not check binary file, when using process substitution or named pipe
+  -j, --threads int             number of CPUs to use (default 4)
+      --verbose                 print verbose information
+```
+     
+## count
+
+```text
+Generate k-mers (sketch) from FASTA/Q sequences
+
+K-mer:
+  1. K-mer code (k<=32)
+  2. Hashed k-mer (ntHash)
+
+K-mer sketches:
+  1. Scaled MinHash
+  2. Minimizer
+  3. Closed Syncmer
+
+Usage:
+  unikmer count [flags] -K -k <k> -u -s [-t <taxid>] <seq files> -o <out prefix>
+
+Flags:
+  -K, --canonical                   only keep the canonical k-mers
+      --circular                    circular genome
+  -H, --hash                        save hash of k-mer, automatically on for k>32. This flag overides
+                                    global flag -c/--compact
+  -h, --help                        help for count
+  -k, --kmer-len int                k-mer length
+  -l, --linear                      output k-mers in linear order
+  -W, --minimizer-w int             minimizer window size
+  -V, --more-verbose                print extra verbose information
+  -o, --out-prefix string           out file prefix ("-" for stdout) (default "-")
+  -T, --parse-taxid                 parse taxid from FASTA/Q header
+  -r, --parse-taxid-regexp string   regular expression for passing taxid
+  -d, --repeated                    only count duplicate k-mers, for removing singleton in FASTQ
+  -D, --scale int                   scale/down-sample factor (default 1)
+  -B, --seq-name-filter strings     list of regular expressions for filtering out sequences by
+                                    header/name, case ignored.
+  -s, --sort                        sort k-mers, this significantly reduce file size for k<=25. This
+                                    flag overides global flag -c/--compact
+  -S, --syncmer-s int               closed syncmer length
+  -t, --taxid uint32                global taxid
+  -u, --unique                      only count unique k-mers, which are not duplicate
+
+```
+
+## info
+
+```text
+Information of binary files
+
+Tips:
+  1. For lots of small files (especially on SDD), use big value of '-j' to
+     parallelize counting.
+
+Usage:
+  unikmer info [flags] 
+
+Aliases:
+  info, stats
+
+Flags:
+  -a, --all                   all information, including number of k-mers
+  -b, --basename              only output basename of files
+  -h, --help                  help for info
+  -o, --out-file string       out file ("-" for stdout, suffix .gz for gzipped out) (default "-")
+  -e, --skip-err              skip error, only show warning message
+      --symbol-false string   smybol for false (default "✕")
+      --symbol-true string    smybol for true (default "✓")
+  -T, --tabular               output in machine-friendly tabular format
+
+```
+
+
+## view
+
+```text
+Read and output binary format to plain text
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+
+Usage:
+  unikmer view [flags] 
+
+Flags:
+  -a, --fasta             output in FASTA format, with encoded integer as FASTA header
+  -q, --fastq             output in FASTQ format, with encoded integer as FASTQ header
+  -g, --genome strings    genomes in (gzipped) fasta file(s) for decoding hashed k-mers
+  -h, --help              help for view
+  -o, --out-file string   out file ("-" for stdout, suffix .gz for gzipped out) (default "-")
+  -n, --show-code         show encoded integer along with k-mer
+  -N, --show-code-only    only show encoded integers, faster than cutting from result of -n/--show-cde
+  -t, --show-taxid        show taxid
+  -T, --show-taxid-only   show taxid only
+
+```
+
+## dump
+
+```text
+Convert plain k-mer text to binary format
+
+Attentions:
+  1. Input should be one k-mer per line, or tab-delimited two columns
+     with a k-mer and it's taxid.
+  2. You can also assign a global taxid with flag -t/--taxid.
+
+Usage:
+  unikmer dump [flags] 
+
+Flags:
+  -K, --canonical           save the canonical k-mers
+  -O, --canonical-only      only save the canonical k-mers. This flag overides -K/--canonical
+  -H, --hash                save hash of k-mer, automatically on for k>32. This flag overides global
+                            flag -c/--compact
+      --hashed              giving hash values of k-mers, This flag overides global flag -c/--compact
+  -h, --help                help for dump
+  -k, --kmer-len int        k-mer length
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  -s, --sorted              input k-mers are sorted
+  -t, --taxid uint32        global taxid
+  -u, --unique              remove duplicate k-mers
+
+```
+
+## encode
+
+```text
+Encode plain k-mer text to integer
+
+Usage:
+  unikmer encode [flags] 
+
+Flags:
+  -a, --all               output all data: orginial k-mer, parsed k-mer, encoded integer, encode bits
+  -K, --canonical         keep the canonical k-mers
+  -H, --hash              save hash of k-mer, automatically on for k>32
+  -h, --help              help for encode
+  -o, --out-file string   out file ("-" for stdout, suffix .gz for gzipped out) (default "-")
+  
+```
+
+## decode
+
+```text
+Decode encoded integer to k-mer text
+
+Usage:
+  unikmer decode [flags] 
+
+Flags:
+  -a, --all               output all data: encoded integer, decoded k-mer
+  -h, --help              help for decode
+  -k, --kmer-len int      k-mer length
+  -o, --out-file string   out file ("-" for stdout, suffix .gz for gzipped out) (default "-")
+
+```
+
+## concat
+
+```text
+Concatenate multiple binary files without removing duplicates
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+
+Usage:
+  unikmer concat [flags] 
+
+Flags:
+  -h, --help                help for concat
+  -n, --number int          number of k-mers (default -1)
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  -s, --sorted              input k-mers are sorted
+  -t, --taxid uint32        global taxid
+
+```
+
+## inter
+
+```text
+Intersection of multiple binary files
+
+Attentions:
+  0. All input files should be sorted, and output file is sorted.
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Taxid information could be inconsistent when using flag --mix-taxid.
+  
+Tips:
+  1. For comparing TWO files with really huge number of k-mers,
+     you can use 'unikmer sort -u -m 100M' for each file,
+     and then 'unikmer merge -' from them.
+  2. Put the smallest file in the beginning to reduce memory usage.
+
+Usage:
+  unikmer inter [flags] 
+
+Flags:
+  -h, --help                help for inter
+  -m, --mix-taxid           allow part of files being whithout taxids
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  
+```
+
+## common
+
+```text
+Find k-mers shared by most of multiple binary files
+
+This command is similar to "unikmer inter" but with looser restriction,
+k-mers shared by some number/proportion of multiple files are outputted.
+
+Attentions:
+  0. All input files should be sorted, and output file is sorted.
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Taxid information could be inconsistent when using flag --mix-taxid.
+  3. At most 65535 input files allowed.
+  
+Tips:
+  1. For comparing TWO files with really huge number of k-mers,
+     you can use 'unikmer sort -u -m 100M' for each file,
+     and then 'unikmer merge -' from them.
+  2. Put the smallest file in the beginning to reduce memory usage.
+
+Usage:
+  unikmer common [flags] 
+
+Flags:
+  -h, --help                help for common
+  -m, --mix-taxid           allow part of files being whithout taxids
+  -n, --number int          minimum number of files that share a k-mer (overides -p/--proportion)
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  -p, --proportion float    minimum proportion of files that share a k-mer (default 1)
+```
+
+## union
+
+```text
+Union of multiple binary files
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+
+Tips:
+  1. 'unikmer sort -u' is slightly faster in cost of more memory usage.
+  2. For really huge number of k-mers, you can use 'unikmer sort -m 100M -u'.
+  3. For large number of sorted .unik files, you can use 'unikmer merge'.
+
+Usage:
+  unikmer union [flags] 
+
+Flags:
+  -h, --help                help for union
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  -s, --sort                sort k-mers, this significantly reduce file size for k<=25. This flag
+                            overides global flag -c/--compact
+```
+
+## diff
+
+```text
+Set difference of multiple binary files
+
+Attentions:
+  0. The first file should be sorted.
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. By default taxids in the 2nd and later files are ignored.
+  3. You can switch on flag -t/--compare-taxid, and input
+     files should ALL have or don't have taxid information.
+     A same k-mer found but query taxid equals to target taxid,
+     or query taxid is ancester of target taxid, this k-mer remains.
+
+Tips:
+  1. Increasing threads number (-j/--threads) to accelerate computation
+     when dealing with lots of files, in cost of more memory occupation.
+
+Usage:
+  unikmer diff [flags] 
+
+Flags:
+  -t, --compare-taxid       take taxid into consideration. type unikmer "diff -h" for detail
+  -h, --help                help for diff
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  -s, --sort                sort k-mers, this significantly reduce file size for k<=25. This flag
+                            overides global flag -c/--compact
+
+```
+
+## sort
+
+```text
+Sort k-mers in binary files to reduce file size
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+
+Notes:
+  1. When sorting from large number of files, this command is equivalent to
+     'unikmer split' + 'unikmer merge'.
+
+Tips:
+  1. You can use '-m/--chunk-size' to limit memory usage, and chunk file size
+     depends on k-mers and file save mode (sorted/compact/normal).
+  2. Increasing value of -j/--threads can accelerates splitting stage,
+     in cost of more memory occupation.
+  3. For sorted input files, the memory usage is very low and speed is fast.
+
+Usage:
+  unikmer sort [flags] 
+
+Flags:
+  -m, --chunk-size string    split input into chunks of N k-mers, supports K/M/G suffix, type "unikmer
+                             sort -h" for detail
+      --force                overwrite tmp dir
+  -h, --help                 help for sort
+  -k, --keep-tmp-dir         keep tmp dir
+  -M, --max-open-files int   max number of open files (default 400)
+  -o, --out-prefix string    out file prefix ("-" for stdout) (default "-")
+  -d, --repeated             only print duplicate k-mers
+  -t, --tmp-dir string       directory for intermediate files (default "./")
+  -u, --unique               remove duplicate k-mers
+
+```
+
+## split
+
+```text
+Split k-mers into sorted chunk files
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+  
+Tips:
+  1. You can use '-m/--chunk-size' to limit memory usage, and chunk file size
+     depends on k-mers and file save mode (sorted/compact/normal).
+  2. Increasing value of -j/--threads can accelerates splitting stage,
+     in cost of more memory occupation.
+  3. For sorted input files, the memory usage is very low and speed is fast.
+
+Usage:
+  unikmer split [flags] 
+
+Flags:
+  -m, --chunk-size string   split input into chunks of N k-mers, supports K/M/G suffix, type "unikmer
+                            sort -h" for detail
+      --force               overwrite output directory
+  -h, --help                help for split
+  -O, --out-dir string      output directory
+  -d, --repeated            split for further printing duplicate k-mers
+  -u, --unique              split for further removing duplicate k-mers
+
+```
+
+## tsplit
+
+```text
+Split k-mers according to taxid
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have taxid information.
+  3. Input files should be sorted using 'unikmer sort'.
+  4. All k-mers will loaded into RAM, for big input files,
+     you can 'split' them first, 'tsplit' and then 'concat'
+     for every taxid.
+  
+Tips:
+  1. Increasing value of -j/--threads can accelerates splitting stage,
+     in cost of more memory occupation.
+
+Usage:
+  unikmer tsplit [flags] 
+
+Flags:
+      --force               overwrite output directory
+  -h, --help                help for tsplit
+  -O, --out-dir string      output directory
+  -o, --out-prefix string   out file prefix (default "tsplit")
+
+```
+
+## merge
+
+```text
+Merge k-mers from sorted chunk files
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+  3. Input files should be sorted.
+  
+Tips:
+  1. If you don't need to compute unique or repeated k-mers, 
+     use 'unikmer concat -s', which is faster.
+
+Usage:
+  unikmer merge [flags] 
+
+Flags:
+      --force                overwrite tmp dir
+  -h, --help                 help for merge
+  -D, --is-dir               intput files are directory containing chunk files
+  -k, --keep-tmp-dir         keep tmp dir
+  -M, --max-open-files int   max number of open files (default 400)
+  -o, --out-prefix string    out file prefix ("-" for stdout) (default "-")
+  -p, --pattern string       chunk file pattern (regular expression) (default "^chunk_\\d+\\.unik$")
+  -d, --repeated             only print duplicate k-mers
+  -t, --tmp-dir string       directory for intermediate files (default "./")
+  -u, --unique               remove duplicate k-mers
+
+```
+
+## head
+
+```text
+Extract the first N k-mers
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+
+Usage:
+  unikmer head [flags] 
+
+Flags:
+  -h, --help                help for head
+  -n, --number int          number of k-mers to extract (default 10)
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+
+```
+
+## sample
+
+```text
+Sample k-mers from binary files.
+
+The Sampling type is fixed sampling.
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Input files should ALL have or don't have taxid information.
+
+Usage:
+  unikmer sample [flags] 
+
+Flags:
+  -h, --help                help for sample
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  -s, --start int           start location (default 1)
+  -w, --window int          window size (default 1)
+
+```
+
+## grep
+
+```text
+Search k-mers from binary files
+
+Attentions:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Canonical k-mers are used and outputted.
+  3. Input files should ALL have or don't have taxid information.
+
+Tips:
+  1. Increase value of '-j' for better performance when dealing with
+     lots of files, especially on SDD.
+  2. For searching using binary .unik file, use 'unikmer inter --mix-taxid',
+     which is faster than 'unikmer grep' in single-thread mode.
+
+Usage:
+  unikmer grep [flags] 
+
+Flags:
+  -D, --degenerate                query k-mers contains degenerate base
+      --force                     overwrite output directory
+  -h, --help                      help for grep
+  -v, --invert-match              invert the sense of matching, to select non-matching records
+  -m, --multiple-outfiles         write results into separated files for multiple input files
+  -O, --out-dir string            output directory (default "unikmer-grep")
+  -o, --out-prefix string         out file prefix ("-" for stdout) (default "-")
+  -S, --out-suffix string         output suffix (default ".grep")
+  -q, --query strings             query k-mers/taxids (multiple values delimted by comma supported)
+  -f, --query-file strings        query file (one k-mer/taxid per line)
+  -t, --query-is-taxid            queries are taxids
+  -F, --query-unik-file strings   query file in .unik format
+  -d, --repeated                  only print duplicate k-mers
+  -s, --sort                      sort k-mers, this significantly reduce file size for k<=25. This flag
+                                  overides global flag -c/--compact
+  -u, --unique                    remove duplicate k-mers
+```
+
+## filter
+
+```text
+Filter out low-complexity k-mers (experimental)
+
+Attentions:
+  1. This command only detects single base repeat now.
+
+Usage:
+  unikmer filter [flags] 
+
+Flags:
+  -h, --help                help for filter
+  -v, --invert              invert result, i.e., output low-complexity k-mers
+  -o, --out-prefix string   out file prefix ("-" for stdout) (default "-")
+  -d, --penalty-d int       penalty for different bases (default 1)
+  -s, --penalty-s int       penalty for successive bases (default 3)
+  -t, --threshold int       penalty threshold for filter, higher is stricter (default 15)
+  -w, --window int          window size for checking penalty (default 7)
+
+```
+
+## rfilter
+
+```text
+Filter k-mers by taxonomic rank
+
+Attentions:
+  1. Flag -L/--lower-than and -H/--higher-than are exclusive, and can be
+     used along with -E/--equal-to which values can be different.
+  2. A list of pre-ordered ranks is in ~/.unikmer/ranks.txt, you can use
+     your list by -r/--rank-file, the format specification is below.
+  3. All ranks in taxonomy database should be defined in rank file.
+  4. Ranks can be removed with black list via -B/--black-list.
+  5. TaxIds with no rank can be optionally discarded by -N/--discard-noranks.
+  6. But when filtering with -L/--lower-than, you can use
+    -n/--save-predictable-norank to save some special ranks without order,
+    where rank of the closest higher node is still lower than rank cutoff.
+
+Rank file:
+  1. Blank lines or lines starting with "#" are ignored.
+  2. Ranks are in decending order and case ignored.
+  3. Ranks with same order should be in one line separated with comma (",", no space).
+  4. Ranks without order should be assigned a prefix symbol "!" for each rank.
+
+Usage:
+  unikmer rfilter [flags] 
+
+Flags:
+  -B, --black-list strings        black list of ranks to discard, e.g., '"no rank", "clade"'
+  -N, --discard-noranks           discard ranks without order, type "unikmer filter --help" for details
+  -R, --discard-root              discard root taxid, defined by --root-taxid
+  -E, --equal-to strings          output taxIDs with rank equal to some ranks, multiple values can be
+                                  separated with comma "," (e.g., -E "genus,species"), or give multiple
+                                  times (e.g., -E genus -E species)
+  -h, --help                      help for rfilter
+  -H, --higher-than string        output ranks higher than a rank, exclusive with --lower-than
+      --list-order                list defined ranks in order
+      --list-ranks                list ordered ranks in taxonomy database
+  -L, --lower-than string         output ranks lower than a rank, exclusive with --higher-than
+  -o, --out-prefix string         out file prefix ("-" for stdout) (default "-")
+  -r, --rank-file string          user-defined ordered taxonomic ranks, type "unikmer rfilter --help"
+                                  for details
+      --root-taxid uint32         root taxid (default 1)
+  -n, --save-predictable-norank   do not discard some special ranks without order when using -L, where
+                                  rank of the closest higher node is still lower than rank cutoff
+
+```
+
+## locate
+
+```text
+Locate k-mers in genome
+
+Attention:
+  0. All files should have the 'canonical' flag.
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Output is BED6 format.
+  3. When using experimental flag --circular, leading subsequence of k-1 bp
+     is appending to end of sequence. End position of k-mers that crossing
+     sequence end would be greater than sequence length.
+
+Usage:
+  unikmer locate [flags] 
+
+Flags:
+      --circular                  circular genome. type "unikmer locate -h" for details
+  -g, --genome strings            genomes in (gzipped) fasta file(s)
+  -h, --help                      help for locate
+  -o, --out-prefix string         out file prefix ("-" for stdout) (default "-")
+  -B, --seq-name-filter strings   list of regular expressions for filtering out sequences by
+                                  header/name, case ignored.
+
+```
+
+## uniqs
+
+```text
+Mapping k-mers back to genome and find unique subsequences
+
+Attention:
+  1. The 'canonical/scaled/hashed' flags of all files should be consistent.
+  2. Default output is in BED3 format, with left-closed and right-open
+     0-based interval.
+  3. When using flag --circular, end position of subsequences that 
+     crossing genome sequence end would be greater than sequence length.
+
+Usage:
+  unikmer uniqs [flags] 
+
+Flags:
+  -M, --allow-multiple-mapped-kmer        allow multiple mapped k-mers
+      --circular                          circular genome. type "unikmer uniqs -h" for details
+  -g, --genome strings                    genomes in (gzipped) fasta file(s)
+  -h, --help                              help for uniqs
+  -x, --max-cont-non-uniq-kmers int       max continuous non-unique k-mers
+  -X, --max-num-cont-non-uniq-kmers int   max number of continuous non-unique k-mers
+  -m, --min-len int                       minimum length of subsequence (default 200)
+  -o, --out-prefix string                 out file prefix ("-" for stdout) (default "-")
+  -a, --output-fasta                      output fasta format instead of BED3
+  -B, --seq-name-filter strings           list of regular expressions for filtering out sequences by
+                                          header/name, case ignored.
+  -W, --seqs-in-a-file-as-one-genome      treat seqs in a genome file as one genome
+
+```
diff -pruN 0.18.8-1/.gitignore 0.19.0-1/.gitignore
--- 0.18.8-1/.gitignore	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/.gitignore	2022-04-25 12:44:35.000000000 +0000
@@ -14,7 +14,7 @@
 *.directory
 unikmer/unikmer*
 unikmer/binaries*
-doc/site/*
+site
 
 *ssshtest
 *.unik
@@ -22,3 +22,5 @@ t_*
 *.nextflow.log*
 *.brename_detail.txt
 */Rplots.pdf
+
+*.unik.*
diff -pruN 0.18.8-1/go.mod 0.19.0-1/go.mod
--- 0.18.8-1/go.mod	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/go.mod	2022-04-25 12:44:35.000000000 +0000
@@ -4,30 +4,33 @@ go 1.17
 
 require (
 	github.com/dustin/go-humanize v1.0.0
-	github.com/klauspost/compress v1.13.6
+	github.com/klauspost/compress v1.15.1
 	github.com/klauspost/pgzip v1.2.5
-	github.com/mattn/go-colorable v0.1.8
+	github.com/mattn/go-colorable v0.1.12
 	github.com/mitchellh/go-homedir v1.1.0
 	github.com/pkg/errors v0.9.1
-	github.com/shenwei356/bio v0.3.1
-	github.com/shenwei356/breader v0.3.1
+	github.com/shenwei356/bio v0.7.1
+	github.com/shenwei356/breader v0.3.2
 	github.com/shenwei356/go-logging v0.0.0-20171012171522-c6b9702d88ba
-	github.com/shenwei356/util v0.3.2
-	github.com/shenwei356/xopen v0.1.0
-	github.com/spf13/cobra v1.2.1
+	github.com/shenwei356/kmers v0.1.0
+	github.com/shenwei356/unik/v5 v5.0.1
+	github.com/shenwei356/util v0.5.0
+	github.com/spf13/cobra v1.4.0
 	github.com/tatsushid/go-prettytable v0.0.0-20141013043238-ed2d14c29939
 	github.com/twotwotwo/sorts v0.0.0-20160814051341-bf5c1f2b8553
 	github.com/will-rowe/nthash v0.4.0
-	github.com/zeebo/wyhash v0.0.1
 )
 
 require (
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
-	github.com/mattn/go-isatty v0.0.12 // indirect
+	github.com/kr/pretty v0.3.0 // indirect
+	github.com/mattn/go-isatty v0.0.14 // indirect
 	github.com/mattn/go-runewidth v0.0.13 // indirect
 	github.com/rivo/uniseg v0.2.0 // indirect
-	github.com/shenwei356/bpool v0.0.0-20160710042833-f9e0ee4d0403 // indirect
 	github.com/shenwei356/natsort v0.0.0-20190418160752-600d539c017d // indirect
+	github.com/shenwei356/xopen v0.2.2 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
-	golang.org/x/sys v0.0.0-20210510120138-977fb7262007 // indirect
+	github.com/ulikunitz/xz v0.5.10 // indirect
+	github.com/zeebo/wyhash v0.0.1 // indirect
+	golang.org/x/sys v0.0.0-20211205182925-97ca703d548d // indirect
 )
diff -pruN 0.18.8-1/go.sum 0.19.0-1/go.sum
--- 0.18.8-1/go.sum	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/go.sum	2022-04-25 12:44:35.000000000 +0000
@@ -56,6 +56,8 @@ github.com/cncf/udpa/go v0.0.0-202011202
 github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
 github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
+github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
+github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
 github.com/cznic/mathutil v0.0.0-20181122101859-297441e03548/go.mod h1:e6NPNENfs9mPDVNRekM7lKScauxd5kXTr1Mfyig6TDM=
 github.com/cznic/sortutil v0.0.0-20181122101858-f5f958428db8 h1:LpMLYGyy67BoAFGda1NeOBQwqlv7nUXpm+rIVHGxZZ4=
 github.com/cznic/sortutil v0.0.0-20181122101858-f5f958428db8/go.mod h1:q2w6Bg5jeox1B+QkJ6Wp/+Vn0G/bo3f1uY7Fn3vivIQ=
@@ -172,24 +174,28 @@ github.com/jstemmer/go-junit-report v0.9
 github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
 github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
 github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
-github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
 github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.15.0/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.15.1 h1:y9FcTHGyrebwfP0ZZqFiaxTaiDnUrGkJkI+f583BL1A=
+github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
 github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE=
 github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
 github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
 github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
-github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
 github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
+github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
+github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
 github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
-github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
+github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
+github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/magiconair/properties v1.8.5/go.mod h1:y3VJvCyxH9uVvJTWEGAELF3aiYNyPKd5NZ3oSwXrF60=
 github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU=
-github.com/mattn/go-colorable v0.1.8 h1:c1ghPdyEDarC70ftn0y+A/Ee++9zz8ljHG1b13eJ0s8=
-github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
+github.com/mattn/go-colorable v0.1.12 h1:jF+Du6AlPIjs2BiUiQlKOX0rt3SujHxPnksPKZbaA40=
+github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4=
 github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
-github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY=
-github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU=
+github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y=
+github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94=
 github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
 github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
 github.com/miekg/dns v1.0.14/go.mod h1:W1PPwlIAgtquWBMBEV9nkV9Cazfe8ScdGz/Lj7v3Nrg=
@@ -206,6 +212,7 @@ github.com/mitchellh/mapstructure v1.4.1
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
 github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
+github.com/montanaflynn/stats v0.6.6/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
 github.com/pascaldekloe/goe v0.0.0-20180627143212-57f6aae5913c/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc=
 github.com/pelletier/go-toml v1.9.3/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
 github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -220,30 +227,39 @@ github.com/rivo/uniseg v0.2.0 h1:S1pD9we
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
 github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
+github.com/rogpeppe/go-internal v1.6.1 h1:/FiVV8dS/e+YqF2JvO3yXRFbBLTIuSDkuC7aBOAvL+k=
+github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
+github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
 github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
-github.com/shenwei356/bio v0.3.1 h1:rdSoslv8HahfJpkzhN6W1Ky1lQ++gaBIfuJAC4f65cc=
-github.com/shenwei356/bio v0.3.1/go.mod h1:rHA8DoaDooXKdqX7bqoksQDKm3mEAQ3MsD9nivVTSf0=
-github.com/shenwei356/bpool v0.0.0-20160710042833-f9e0ee4d0403 h1:/3JklLnHXiWUBxWc3joQYavDQJpncRhRA909cUb7eOw=
-github.com/shenwei356/bpool v0.0.0-20160710042833-f9e0ee4d0403/go.mod h1:YkgdTWfNnJgv5HVJbVSDmxQtkK3/jZWDoqcG26BVU8k=
-github.com/shenwei356/breader v0.3.1 h1:OjgfeHhpNGQPkS1+lgsl4eNuuO//Y16N6TkqG5oxO5U=
+github.com/shenwei356/bio v0.7.1 h1:hbIoANV2OQAbUA+xcLs503T/vHtjlSrT2sdSrUdases=
+github.com/shenwei356/bio v0.7.1/go.mod h1:neE5KTtMM9fqM41UgzoPUUxhs/sL7Ck1CIeDoYz+nQI=
 github.com/shenwei356/breader v0.3.1/go.mod h1:UR11JJCxU9s7eUdU4xn3L/VodxoXzWhjJPh8WZbb+us=
+github.com/shenwei356/breader v0.3.2 h1:GLy2clIMck6FdTwj8WLnmhv0PW/7Pp+Wcx7TVEHG0ks=
+github.com/shenwei356/breader v0.3.2/go.mod h1:BimwolkMTIr/O4iX7xXtjEB1z5y39G+8I5Tsm9guC3E=
 github.com/shenwei356/go-logging v0.0.0-20171012171522-c6b9702d88ba h1:UvnrxFDPmz7agYX0eQ2JEorTKn1ORnZ9dT5OzbjPvK8=
 github.com/shenwei356/go-logging v0.0.0-20171012171522-c6b9702d88ba/go.mod h1:LiqYp/K5yCEWOi7Ux/iOF/kjDxtsdYjOGcKHDbEOXFU=
+github.com/shenwei356/kmers v0.1.0 h1:zPmWftXQWDugG99Wxd3rFmCIF2QZEUpEba3jOSEn7nE=
+github.com/shenwei356/kmers v0.1.0/go.mod h1:23Ltr95n98LYy9OtJMFSzkmU/1nmdYwgzqB3walAQ6g=
 github.com/shenwei356/natsort v0.0.0-20190418160752-600d539c017d h1:eeXLHcXyGEr72V1SOSEI7vSzUOTJvHutwF7Ykm+hscQ=
 github.com/shenwei356/natsort v0.0.0-20190418160752-600d539c017d/go.mod h1:SiiGiRFyRtV7S9RamOrmQR5gpGIRhWJM1w0EtmuQ1io=
-github.com/shenwei356/util v0.3.2 h1:3qXkcO2erNKnPCnV8zxjN2JL5sGnOqW+muj1x4XxkuM=
-github.com/shenwei356/util v0.3.2/go.mod h1:pY/f5wR/0o0dJcodhw1Df/ghzqNt2wFSATW0zymI4mA=
-github.com/shenwei356/xopen v0.1.0 h1:PizY52rLA7A6EdkwKZ6A8h8/a+c9DCBXqfLtwVzsWnM=
+github.com/shenwei356/unik/v5 v5.0.1 h1:IzQRepKUgPeu8HaFIK0Rg1oikaVXnKhHJOtg/pNYPOc=
+github.com/shenwei356/unik/v5 v5.0.1/go.mod h1:XNQTXvHsRsHA5ImUX1HhhhHccDs2Bf0gQRA1Q4kH0Vo=
+github.com/shenwei356/util v0.5.0 h1:gbPuGYVggNLOSORuZLnpaB2DrIpyDFolHiZQkyja+XU=
+github.com/shenwei356/util v0.5.0/go.mod h1:goFN/u2HgvfbOsEgoHA2hUEet+9KjZpRavrVGz9cm30=
 github.com/shenwei356/xopen v0.1.0/go.mod h1:6EQUa6I7Zsl2GQKqcL9qGLrTzVE+oZyly+uhzovQYSk=
+github.com/shenwei356/xopen v0.2.1/go.mod h1:6EQUa6I7Zsl2GQKqcL9qGLrTzVE+oZyly+uhzovQYSk=
+github.com/shenwei356/xopen v0.2.2 h1:g1v3YjiIky9k6oN4qmnU1bDciAHnSrmOn2sMTE5pChY=
+github.com/shenwei356/xopen v0.2.2/go.mod h1:6EQUa6I7Zsl2GQKqcL9qGLrTzVE+oZyly+uhzovQYSk=
 github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
 github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
 github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
 github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I=
 github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
-github.com/spf13/cobra v1.2.1 h1:+KmjbUw1hriSNMF55oPrkZcb27aECyrj8V2ytv7kWDw=
 github.com/spf13/cobra v1.2.1/go.mod h1:ExllRjgxM/piMAM+3tAZvg8fsklGAf3tPfi+i8t68Nk=
+github.com/spf13/cobra v1.4.0 h1:y+wJpx64xcgO1V+RcnwW0LEHxTKRi2ZDPSBjWnrg88Q=
+github.com/spf13/cobra v1.4.0/go.mod h1:Wo4iy3BUC+X2Fybo0PDqwJIv3dNRiZLHQymsfxlB84g=
 github.com/spf13/jwalterweatherman v1.1.0/go.mod h1:aNWZUN0dPAAO/Ljvb5BEdw96iTZ0EXowPYD95IqWIGo=
 github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
@@ -260,6 +276,8 @@ github.com/tatsushid/go-prettytable v0.0
 github.com/tatsushid/go-prettytable v0.0.0-20141013043238-ed2d14c29939/go.mod h1:omGxs4/6hNjxPKUTjmaNkPzehSnNJOJN6pMEbrlYIT4=
 github.com/twotwotwo/sorts v0.0.0-20160814051341-bf5c1f2b8553 h1:DRC1ubdb3ZmyyIeCSTxjZIQAnpLPfKVgYrLETQuOPjo=
 github.com/twotwotwo/sorts v0.0.0-20160814051341-bf5c1f2b8553/go.mod h1:Rj7Csq/tZ/egz+Ltc2IVpsA5309AmSMEswjkTZmq2Xc=
+github.com/ulikunitz/xz v0.5.10 h1:t92gobL9l3HE202wg3rlk19F6X+JOxl9BBrCCMYEYd8=
+github.com/ulikunitz/xz v0.5.10/go.mod h1:nbz6k7qbPmH4IRqmfOplQw/tblSgqTqBwxkY0oWt/14=
 github.com/will-rowe/nthash v0.4.0 h1:YiHdqR0phP9o/kKVMJJiuXYY9qOH8QHofptDqUCOxrU=
 github.com/will-rowe/nthash v0.4.0/go.mod h1:5ezweuK0J5j+/7lih/RkrSmnxI3hoaPpQiVWJ7rd960=
 github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
@@ -399,7 +417,6 @@ golang.org/x/sys v0.0.0-20191005200804-a
 golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@@ -424,8 +441,11 @@ golang.org/x/sys v0.0.0-20210315160823-c
 golang.org/x/sys v0.0.0-20210320140829-1e4c9ba3b0c4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.0.0-20210403161142-5e06dd20ab57/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
-golang.org/x/sys v0.0.0-20210510120138-977fb7262007 h1:gG67DSER+11cZvqIMb8S8bt0vZtiN6xWYARwirrOSfE=
 golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
+golang.org/x/sys v0.0.0-20211205182925-97ca703d548d h1:FjkYO/PPp4Wi0EAUOVLxePm7qVW4r4ctbWpURyuOD0E=
+golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff -pruN 0.18.8-1/index/serialization.go 0.19.0-1/index/serialization.go
--- 0.18.8-1/index/serialization.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/index/serialization.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,361 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package index
-
-import (
-	"encoding/binary"
-	"errors"
-	"fmt"
-	"io"
-	"strings"
-)
-
-// Version is the version of index format
-const Version uint8 = 2
-
-// Magic number of index file.
-var Magic = [8]byte{'.', 'u', 'n', 'i', 'k', 'i', 'd', 'x'}
-
-// ErrInvalidIndexFileFormat means invalid index format.
-var ErrInvalidIndexFileFormat = errors.New("unikmer/index: invalid index format")
-
-// ErrUnfishedWrite means writing not finished
-var ErrUnfishedWrite = errors.New("unikmer/index: index not fished writing")
-
-// ErrTruncateIndexFile means the file is truncated
-var ErrTruncateIndexFile = errors.New("unikmer/index: truncated index file")
-
-// ErrWrongWriteDataSize means the size of data to write is invalid
-var ErrWrongWriteDataSize = errors.New("unikmer/index: write data with wrong size")
-
-// ErrVersionMismatch means version mismatch between files and program
-var ErrVersionMismatch = errors.New("unikmer/index: version mismatch")
-
-// ErrNameAndSizeMismatch means size of names and sizes are not equal.
-var ErrNameAndSizeMismatch = errors.New("unikmer/index: size of names and sizes unequal")
-
-// ErrNameAndIndexMismatch means size of names and sizes are not equal.
-var ErrNameAndIndexMismatch = errors.New("unikmer/index: size of names and indices unequal")
-
-var be = binary.BigEndian
-
-// Header contains metadata
-type Header struct {
-	Version   uint8 // uint8
-	K         int   // uint8
-	Canonical bool  // uint8
-	NumHashes uint8 // uint8
-	NumSigs   uint64
-	Names     []string
-	Indices   []uint32
-	Sizes     []uint64
-
-	NumRowBytes int // length of bytes for storing one row of signiture for n names
-}
-
-func (h Header) String() string {
-	return fmt.Sprintf("unikmer index file v%d: k: %d, canonical: %v, #hashes: %d, #signatures: %d, names: %s",
-		h.Version, h.K, h.Canonical, h.NumHashes, h.NumSigs, strings.Join(h.Names, ", "))
-}
-
-// Compatible checks compatibility
-func (h Header) Compatible(b Header) bool {
-	if h.Version == b.Version &&
-		h.K == b.K &&
-		h.Canonical == b.Canonical &&
-		h.NumHashes == b.NumHashes {
-
-		return true
-	}
-	return false
-}
-
-// Reader is for reading KmerCode.
-type Reader struct {
-	Header
-	r io.Reader
-
-	count uint64
-}
-
-// NewReader returns a Reader.
-func NewReader(r io.Reader) (reader *Reader, err error) {
-	reader = &Reader{r: r}
-	err = reader.readHeader()
-	if err != nil {
-		return nil, err
-	}
-
-	reader.NumRowBytes = int((len(reader.Names) + 7) / 8)
-	return reader, nil
-}
-
-func (reader *Reader) readHeader() (err error) {
-	// check Magic number
-	var m [8]byte
-	r := reader.r
-	err = binary.Read(r, be, &m)
-	if err != nil {
-		return err
-	}
-	same := true
-	for i := 0; i < 8; i++ {
-		if Magic[i] != m[i] {
-			same = false
-			break
-		}
-	}
-	if !same {
-		return ErrInvalidIndexFileFormat
-	}
-
-	// 4 bytes meta info
-	var meta [4]uint8
-	err = binary.Read(r, be, &meta)
-	if err != nil {
-		return err
-	}
-	// check compatibility
-	if Version != meta[0] {
-		return ErrVersionMismatch
-	}
-	reader.Version = meta[0]
-	reader.K = int(meta[1])
-	if meta[2] > 0 {
-		reader.Canonical = true
-	}
-	reader.NumHashes = meta[3]
-
-	// 8 bytes signature size
-	err = binary.Read(r, be, &reader.NumSigs)
-	if err != nil {
-		return err
-	}
-
-	// 4 bytes length of Names
-	var n uint32
-	err = binary.Read(r, be, &n)
-	if err != nil {
-		return err
-	}
-
-	// Names
-	namesData := make([]byte, n)
-	err = binary.Read(r, be, &namesData)
-	if err != nil {
-		return err
-	}
-	names := strings.Split(string(namesData), "\n")
-	names = names[0 : len(names)-1]
-	reader.Names = names
-
-	// Indices
-	indicesData := make([]uint32, len(names))
-	err = binary.Read(r, be, &indicesData)
-	if err != nil {
-		return err
-	}
-	reader.Indices = indicesData
-
-	// Sizes
-	sizesData := make([]uint64, len(names))
-	err = binary.Read(r, be, &sizesData)
-	if err != nil {
-		return err
-	}
-	reader.Sizes = sizesData
-
-	return nil
-}
-
-// Read reads one code.
-func (reader *Reader) Read() ([]byte, error) {
-	data := make([]byte, reader.NumRowBytes)
-	nReaded, err := io.ReadFull(reader.r, data)
-	if err != nil {
-		if err == io.EOF {
-			if reader.count != reader.NumSigs {
-				return nil, ErrTruncateIndexFile
-			}
-		}
-		return nil, err
-	}
-	if nReaded < reader.NumRowBytes {
-		return nil, ErrTruncateIndexFile
-	}
-	reader.count++
-	return data, nil
-}
-
-// Writer writes KmerCode.
-type Writer struct {
-	Header
-	w           io.Writer
-	wroteHeader bool
-
-	count uint64
-}
-
-// NewWriter creates a Writer.
-func NewWriter(w io.Writer, k int, canonical bool, numHashes uint8, numSigs uint64, names []string, indices []uint32, sizes []uint64) (*Writer, error) {
-	if len(names) != len(sizes) {
-		return nil, ErrNameAndSizeMismatch
-	}
-	if len(names) != len(indices) {
-		return nil, ErrNameAndIndexMismatch
-	}
-
-	writer := &Writer{
-		Header: Header{
-			Version:   Version,
-			K:         k,
-			Canonical: canonical,
-			NumHashes: numHashes,
-			NumSigs:   numSigs,
-			Names:     names,
-			Indices:   indices,
-			Sizes:     sizes,
-		},
-		w: w,
-	}
-	writer.NumRowBytes = int((len(names) + 7) / 8)
-
-	return writer, nil
-}
-
-// WriteHeader writes file header
-func (writer *Writer) WriteHeader() (err error) {
-	if writer.wroteHeader {
-		return nil
-	}
-	w := writer.w
-
-	// 8 bytes magic number
-	err = binary.Write(w, be, Magic)
-	if err != nil {
-		return err
-	}
-
-	// 4 bytes meta info
-	var canonical uint8
-	if writer.Canonical {
-		canonical = 1
-	}
-	err = binary.Write(w, be, [4]uint8{writer.Version, uint8(writer.K), canonical, writer.NumHashes})
-	if err != nil {
-		return err
-	}
-
-	// 8 bytes signature size
-	err = binary.Write(w, be, writer.NumSigs)
-	if err != nil {
-		return err
-	}
-
-	// 4 bytes length of Names
-	var n int
-	for _, name := range writer.Names {
-		n += len(name) + 1
-	}
-
-	err = binary.Write(w, be, uint32(n))
-	if err != nil {
-		return err
-	}
-
-	// Names
-	for _, name := range writer.Names {
-		err = binary.Write(w, be, []byte(name+"\n"))
-		if err != nil {
-			return err
-		}
-	}
-
-	// Indices
-	err = binary.Write(w, be, writer.Indices)
-	if err != nil {
-		return err
-	}
-
-	// Sizes
-	err = binary.Write(w, be, writer.Sizes)
-	if err != nil {
-		return err
-	}
-
-	writer.wroteHeader = true
-	return nil
-}
-
-// Write writes some thing
-func (writer *Writer) Write(data []byte) (err error) {
-	if len(data) != writer.NumRowBytes {
-		return ErrWrongWriteDataSize
-	}
-
-	// lazily write header
-	if !writer.wroteHeader {
-		err = writer.WriteHeader()
-		if err != nil {
-			return err
-		}
-		writer.wroteHeader = true
-	}
-
-	_, err = writer.w.Write(data)
-	if err != nil {
-		return err
-	}
-
-	writer.count++
-	return nil
-}
-
-// WriteBatch writes a batch of data
-func (writer *Writer) WriteBatch(data []byte, n int) (err error) {
-	// lazily write header
-	if !writer.wroteHeader {
-		err = writer.WriteHeader()
-		if err != nil {
-			return err
-		}
-		writer.wroteHeader = true
-	}
-
-	_, err = writer.w.Write(data)
-	if err != nil {
-		return err
-	}
-
-	writer.count += uint64(n)
-	return nil
-}
-
-// Flush check completeness
-func (writer *Writer) Flush() (err error) {
-	if !writer.wroteHeader {
-		writer.WriteHeader()
-	}
-	if writer.count != writer.NumSigs {
-		return ErrUnfishedWrite
-	}
-	return nil
-}
diff -pruN 0.18.8-1/index/serialization_test.go 0.19.0-1/index/serialization_test.go
--- 0.18.8-1/index/serialization_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/index/serialization_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,166 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package index
-
-import (
-	"bufio"
-	"bytes"
-	"fmt"
-	"io"
-	"os"
-	"testing"
-)
-
-func TestIndexReadAndWrite(t *testing.T) {
-	file := "test.unikidx"
-	defer func() {
-		err := os.Remove(file)
-		if err != nil {
-			t.Errorf("clean error %s", err)
-		}
-	}()
-
-	k := 31
-	canonical := true
-	numHashes := uint8(1)
-	numSigs := uint64(2)
-	names := []string{"a", "b", "c", "d", "e", "f", "g", "h", "i"}
-	indices := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9}
-	sizes := []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9}
-	data := [][]byte{[]byte("aa"), []byte("bb")}
-	err := write(file, k, canonical, numHashes, numSigs, names, indices, sizes, data)
-	if err != nil {
-		t.Errorf("write error %s", err)
-	}
-
-	reader, datas, err := read(file)
-	if err != nil {
-		t.Errorf("read error %s", err)
-	}
-	if reader.K != k {
-		t.Errorf("unmatch k")
-	}
-
-	if reader.Canonical != canonical {
-		t.Errorf("unmatch canonical")
-	}
-
-	if reader.NumHashes != numHashes {
-		t.Errorf("unmatch NumHashes")
-	}
-	if reader.NumSigs != numSigs {
-		t.Errorf("unmatch NumSigs")
-	}
-	if len(reader.Names) != len(names) {
-		t.Errorf("unmatch names length")
-	}
-	for i, n := range names {
-		if reader.Names[i] != n {
-			t.Errorf("unmatch name")
-		}
-	}
-	if len(reader.Indices) != len(indices) {
-		t.Errorf("unmatch indices length")
-	}
-	for i, n := range indices {
-		if reader.Indices[i] != n {
-			t.Errorf("unmatch index")
-		}
-	}
-	if len(reader.Sizes) != len(sizes) {
-		t.Errorf("unmatch sizes length")
-	}
-	for i, n := range sizes {
-		if reader.Sizes[i] != n {
-			t.Errorf("unmatch size")
-		}
-	}
-	if len(datas) != len(data) {
-		t.Errorf("unmatch data length")
-	}
-	for i, d := range data {
-		if bytes.Compare(d, datas[i]) != 0 {
-			t.Errorf("unmatch data")
-		}
-	}
-
-}
-
-func write(file string, k int, canonical bool, numHashes uint8, numSigs uint64, names []string, indices []uint32, sizes []uint64, datas [][]byte) error {
-	w, err := os.Create(file)
-	if err != nil {
-		return err
-	}
-	defer w.Close()
-
-	outfh := bufio.NewWriter(w)
-	defer outfh.Flush()
-
-	writer, err := NewWriter(outfh, k, canonical, numHashes, numSigs, names, indices, sizes)
-	if err != nil {
-		return err
-	}
-	for _, data := range datas {
-		err = writer.Write(data)
-		if err != nil {
-			return err
-		}
-	}
-	err = writer.Flush()
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
-func read(file string) (*Reader, [][]byte, error) {
-	r, err := os.Open(file)
-	if err != nil {
-		return nil, nil, err
-	}
-	defer r.Close()
-
-	infh := bufio.NewReader(r)
-
-	reader, err := NewReader(infh)
-	if err != nil {
-		return reader, nil, err
-	}
-
-	fmt.Println(reader.Header)
-
-	datas := make([][]byte, 0, 10)
-	var data []byte
-	for {
-		data, err = reader.Read()
-		if err != nil {
-			if err == io.EOF {
-				break
-			}
-			return nil, nil, err
-		}
-
-		datas = append(datas, data)
-	}
-
-	return reader, datas, nil
-}
diff -pruN 0.18.8-1/iterator.go 0.19.0-1/iterator.go
--- 0.18.8-1/iterator.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/iterator.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,229 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"fmt"
-	"sync"
-
-	"github.com/pkg/errors"
-	"github.com/shenwei356/bio/seq"
-	"github.com/will-rowe/nthash"
-)
-
-// ErrInvalidK means k < 1.
-var ErrInvalidK = fmt.Errorf("unikmer: invalid k-mer size")
-
-// ErrEmptySeq sequence is empty.
-var ErrEmptySeq = fmt.Errorf("unikmer: empty sequence")
-
-// ErrShortSeq means the sequence is shorter than k
-var ErrShortSeq = fmt.Errorf("unikmer: sequence too short")
-
-var poolIterator = &sync.Pool{New: func() interface{} {
-	return &Iterator{}
-}}
-
-// Iterator is a kmer code (k<=32) or hash iterator.
-type Iterator struct {
-	s         *seq.Seq // only used for KmerIterator
-	k         int
-	kUint     uint // uint(k)
-	kP1       int  // k -1
-	kP1Uint   uint // uint(k-1)
-	canonical bool
-	circular  bool
-
-	hash bool
-
-	finished     bool
-	revcomStrand bool
-	idx          int
-
-	// for KmerIterator
-	length    int
-	end, e    int
-	first     bool
-	kmer      []byte
-	codeBase  uint64
-	preCode   uint64
-	preCodeRC uint64
-	codeRC    uint64
-
-	// for HashIterator
-	hasher *nthash.NTHi
-}
-
-// NewHashIterator returns ntHash Iterator.
-func NewHashIterator(s *seq.Seq, k int, canonical bool, circular bool) (*Iterator, error) {
-	if k < 1 {
-		return nil, ErrInvalidK
-	}
-	if len(s.Seq) < k {
-		return nil, ErrShortSeq
-	}
-
-	// iter := &Iterator{s: s, k: k, canonical: canonical, circular: circular}
-	iter := poolIterator.Get().(*Iterator)
-	iter.s = s
-	iter.k = k
-	iter.canonical = canonical
-	iter.circular = circular
-	iter.finished = false
-	iter.revcomStrand = false
-	iter.idx = 0
-
-	iter.hash = true
-	iter.kUint = uint(k)
-	iter.kP1 = k - 1
-	iter.kP1Uint = uint(k - 1)
-
-	var err error
-	var seq2 []byte
-	if circular {
-		seq2 = make([]byte, len(s.Seq), len(s.Seq)+k-1)
-		copy(seq2, s.Seq) // do not edit original sequence
-		seq2 = append(seq2, s.Seq[0:k-1]...)
-	} else {
-		seq2 = s.Seq
-	}
-	iter.hasher, err = nthash.NewHasher(&seq2, uint(k))
-	if err != nil {
-		return nil, err
-	}
-
-	return iter, nil
-}
-
-// NextHash returns next ntHash.
-func (iter *Iterator) NextHash() (code uint64, ok bool) {
-	code, ok = iter.hasher.Next(iter.canonical)
-	if !ok {
-		poolIterator.Put(iter)
-	}
-	iter.idx++
-	return code, ok
-}
-
-// NewKmerIterator returns k-mer code iterator.
-func NewKmerIterator(s *seq.Seq, k int, canonical bool, circular bool) (*Iterator, error) {
-	if k < 1 {
-		return nil, ErrInvalidK
-	}
-	if len(s.Seq) < k {
-		return nil, ErrShortSeq
-	}
-
-	var s2 *seq.Seq
-	if circular {
-		s2 = s.Clone() // do not edit original sequence
-		s2.Seq = append(s2.Seq, s.Seq[0:k-1]...)
-	} else {
-		s2 = s
-	}
-
-	// iter := &Iterator{s: s2, k: k, canonical: canonical, circular: circular}
-	iter := poolIterator.Get().(*Iterator)
-	iter.s = s2
-	iter.k = k
-	iter.canonical = canonical
-	iter.circular = circular
-	iter.finished = false
-	iter.revcomStrand = false
-	iter.idx = 0
-
-	iter.length = len(s2.Seq)
-	iter.end = iter.length - k + 1
-	iter.kUint = uint(k)
-	iter.kP1 = k - 1
-	iter.kP1Uint = uint(k - 1)
-
-	iter.first = true
-
-	return iter, nil
-}
-
-// NextKmer returns next k-mer code.
-func (iter *Iterator) NextKmer() (code uint64, ok bool, err error) {
-	if iter.finished {
-		return 0, false, nil
-	}
-
-	if iter.idx == iter.end {
-		if iter.canonical || iter.revcomStrand {
-			iter.finished = true
-			poolIterator.Put(iter)
-			return 0, false, nil
-		}
-		iter.s.RevComInplace()
-		iter.idx = 0
-		iter.revcomStrand = true
-		iter.first = true
-	}
-
-	iter.e = iter.idx + iter.k
-	iter.kmer = iter.s.Seq[iter.idx:iter.e]
-
-	if !iter.first {
-		iter.codeBase = base2bit[iter.kmer[iter.kP1]]
-		if iter.codeBase == 4 {
-			err = ErrIllegalBase
-		}
-
-		// compute code from previous one
-		code = iter.preCode&((1<<(iter.kP1Uint<<1))-1)<<2 | iter.codeBase
-
-		// compute code of revcomp kmer from previous one
-		iter.codeRC = (iter.codeBase^3)<<(iter.kP1Uint<<1) | (iter.preCodeRC >> 2)
-	} else {
-		code, err = Encode(iter.kmer)
-		iter.codeRC = MustRevComp(code, iter.k)
-		iter.first = false
-	}
-	if err != nil {
-		return 0, false, errors.Wrapf(err, "encode %s", iter.kmer)
-	}
-
-	iter.preCode = code
-	iter.preCodeRC = iter.codeRC
-	iter.idx++
-
-	if iter.canonical && code > iter.codeRC {
-		code = iter.codeRC
-	}
-
-	return code, true, nil
-}
-
-// Next is a wrapter for NextHash and NextKmer.
-func (iter *Iterator) Next() (code uint64, ok bool, err error) {
-	if iter.hash {
-		code, ok = iter.NextHash()
-		return
-	}
-	code, ok, err = iter.NextKmer()
-	return
-}
-
-// Index returns current 0-baesd index.
-func (iter *Iterator) Index() int {
-	return iter.idx - 1
-}
diff -pruN 0.18.8-1/iterator-protein.go 0.19.0-1/iterator-protein.go
--- 0.18.8-1/iterator-protein.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/iterator-protein.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,95 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"sync"
-
-	"github.com/shenwei356/bio/seq"
-	"github.com/zeebo/wyhash"
-)
-
-var poolProteinIterator = &sync.Pool{New: func() interface{} {
-	return &ProteinIterator{}
-}}
-
-// ProteinIterator is a iterator for protein sequence.
-type ProteinIterator struct {
-	s0 *seq.Seq // only used for KmerProteinIterator
-	s  *seq.Seq // amino acid
-
-	k        int
-	finished bool
-	end      int
-	idx      int
-}
-
-// NewProteinIterator returns an iterator for hash of amino acids
-func NewProteinIterator(s *seq.Seq, k int, codonTable int, frame int) (*ProteinIterator, error) {
-	if k < 1 {
-		return nil, ErrInvalidK
-	}
-	if len(s.Seq) < k*3 {
-		return nil, ErrShortSeq
-	}
-
-	// iter := &ProteinIterator{s0: s, k: k}
-	iter := poolProteinIterator.Get().(*ProteinIterator)
-	iter.s0 = s
-	iter.k = k
-	iter.finished = false
-	iter.idx = 0
-
-	var err error
-	if s.Alphabet != seq.Protein {
-		iter.s, err = s.Translate(codonTable, frame, false, false, true, false)
-		if err != nil {
-			return nil, err
-		}
-	} else {
-		iter.s = s
-	}
-	iter.end = len(iter.s.Seq) - k
-
-	return iter, nil
-}
-
-// Next return's a hash
-func (iter *ProteinIterator) Next() (code uint64, ok bool) {
-	if iter.finished {
-		return 0, false
-	}
-
-	if iter.idx > iter.end {
-		iter.finished = true
-		poolProteinIterator.Put(iter)
-		return 0, false
-	}
-
-	code = wyhash.Hash(iter.s.Seq[iter.idx:iter.idx+iter.k], 1)
-	iter.idx++
-	return code, true
-}
-
-// Index returns current 0-baesd index.
-func (iter *ProteinIterator) Index() int {
-	return iter.idx - 1
-}
diff -pruN 0.18.8-1/iterator-protein_test.go 0.19.0-1/iterator-protein_test.go
--- 0.18.8-1/iterator-protein_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/iterator-protein_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,62 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"testing"
-
-	"github.com/shenwei356/bio/seq"
-)
-
-func TestProteinIterator(t *testing.T) {
-	_s := "AAGTTTGAATCATTCAACTATCTAGTTTTCAGAGAACAATGTTCTCTAAAGAATAGAAAAGAGTCATTGTGCGGTGATGATGGCGGGAAGGATCCACCTG"
-	sequence, err := seq.NewSeq(seq.DNA, []byte(_s))
-	if err != nil {
-		t.Errorf("fail to create sequence: %s", _s)
-	}
-	k := 10
-
-	iter, err := NewProteinIterator(sequence, k, 1, 1)
-	if err != nil {
-		t.Errorf("fail to create aa iter rator")
-	}
-
-	var code uint64
-	var ok bool
-	// var idx int
-	codes := make([]uint64, 0, 1024)
-	for {
-		code, ok = iter.Next()
-		if !ok {
-			break
-		}
-
-		// idx = iter.Index()
-		// fmt.Printf("aa: %d-%s, %d\n", idx, iter.s.Seq[idx:idx+k], code)
-
-		codes = append(codes, code)
-	}
-
-	if len(codes) != len(_s)/3-k+1 {
-		t.Errorf("k-mer hashes number error")
-	}
-
-}
diff -pruN 0.18.8-1/iterator_test.go 0.19.0-1/iterator_test.go
--- 0.18.8-1/iterator_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/iterator_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,210 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"math/rand"
-	"testing"
-
-	"github.com/shenwei356/bio/seq"
-	"github.com/shenwei356/util/bytesize"
-)
-
-func TestKmerIterator(t *testing.T) {
-	_s := "AAGTTTGAATCATTCAACTATCTAGTTTTCAGAGAACAATGTTCTCTAAAGAATAGAAAAGAGTCATTGTGCGGTGATGATGGCGGGAAGGATCCACCTG"
-	sequence, err := seq.NewSeq(seq.DNA, []byte(_s))
-	if err != nil {
-		t.Errorf("fail to create sequence: %s", _s)
-	}
-	k := 10
-
-	iter, err := NewKmerIterator(sequence, k, true, false)
-	if err != nil {
-		t.Errorf("fail to create aa iter rator")
-	}
-
-	var code uint64
-	var ok bool
-	// var idx int
-	codes := make([]uint64, 0, 1024)
-	for {
-		code, ok, err = iter.Next()
-		if err != nil {
-			t.Error(err)
-		}
-		if !ok {
-			break
-		}
-
-		// idx = iter.Index()
-		// fmt.Printf("kmer: %d-%s, %d\n", idx, iter.s.Seq[idx:idx+k], code)
-
-		codes = append(codes, code)
-	}
-
-	if len(codes) != len(_s)-k+1 {
-		t.Errorf("k-mers number error")
-	}
-}
-
-func TestHashIterator(t *testing.T) {
-	_s := "AAGTTTGAATCATTCAACTATCTAGTTTTCAGAGAACAATGTTCTCTAAAGAATAGAAAAGAGTCATTGTGCGGTGATGATGGCGGGAAGGATCCACCTG"
-	sequence, err := seq.NewSeq(seq.DNA, []byte(_s))
-	if err != nil {
-		t.Errorf("fail to create sequence: %s", _s)
-	}
-	k := 10
-
-	iter, err := NewHashIterator(sequence, k, true, false)
-	if err != nil {
-		t.Errorf("fail to create aa iter rator")
-	}
-
-	var code uint64
-	var ok bool
-	// var idx int
-	codes := make([]uint64, 0, 1024)
-	for {
-		code, ok, err = iter.Next()
-		if err != nil {
-			t.Error(err)
-		}
-		if !ok {
-			break
-		}
-
-		// idx = iter.Index()
-		// fmt.Printf("kmer: %d-%s, %d\n", idx, iter.s.Seq[idx:idx+k], code)
-
-		codes = append(codes, code)
-	}
-
-	if len(codes) != len(_s)-k+1 {
-		t.Errorf("k-mer hashes number error")
-	}
-}
-
-var benchSeqs []*seq.Seq
-var _code uint64
-
-func init() {
-	rand.Seed(11)
-
-	sizes := []int{1 << 10} //, 1 << 20, 10 << 20}
-	benchSeqs = make([]*seq.Seq, len(sizes))
-	var err error
-	for i, size := range sizes {
-		sequence := make([]byte, size)
-
-		// fmt.Printf("generating pseudo DNA with length of %s ...\n", bytesize.ByteSize(size))
-		for j := 0; j < size; j++ {
-			sequence[j] = bit2base[rand.Intn(4)]
-		}
-		benchSeqs[i], err = seq.NewSeq(seq.DNA, sequence)
-		if err != nil {
-			panic("should not happen")
-		}
-		// fmt.Println(benchSeqs[i])
-	}
-	// fmt.Printf("%d DNA sequences generated\n", len(sizes))
-}
-
-func BenchmarkKmerIterator(b *testing.B) {
-	for i := range benchSeqs {
-		size := len(benchSeqs[i].Seq)
-		b.Run(bytesize.ByteSize(size).String(), func(b *testing.B) {
-			var code uint64
-			var ok bool
-
-			for j := 0; j < b.N; j++ {
-				iter, err := NewKmerIterator(benchSeqs[i], 31, true, false)
-				if err != nil {
-					b.Errorf("fail to create hash iterator. seq length: %d", size)
-				}
-				for {
-					code, ok, err = iter.NextKmer()
-					if err != nil {
-						b.Errorf("fail to get kmer code: %d-%s", iter.Index(),
-							benchSeqs[i].Seq[iter.Index():iter.Index()+31])
-					}
-
-					if !ok {
-						break
-					}
-
-					_code = code
-				}
-			}
-		})
-	}
-}
-
-func BenchmarkHashIterator(b *testing.B) {
-	for i := range benchSeqs {
-		size := len(benchSeqs[i].Seq)
-		b.Run(bytesize.ByteSize(size).String(), func(b *testing.B) {
-			var code uint64
-			var ok bool
-
-			for j := 0; j < b.N; j++ {
-				iter, err := NewHashIterator(benchSeqs[i], 31, true, false)
-				if err != nil {
-					b.Errorf("fail to create hash iterator. seq length: %d", size)
-				}
-
-				for {
-					code, ok = iter.NextHash()
-					if !ok {
-						break
-					}
-
-					_code = code
-				}
-			}
-		})
-	}
-}
-
-func BenchmarkProteinIterator(b *testing.B) {
-	for i := range benchSeqs {
-		size := len(benchSeqs[i].Seq)
-		b.Run(bytesize.ByteSize(size).String(), func(b *testing.B) {
-			var code uint64
-			var ok bool
-
-			for j := 0; j < b.N; j++ {
-				iter, err := NewProteinIterator(benchSeqs[i], 10, 1, 1)
-				if err != nil {
-					b.Errorf("fail to create hash iterator. seq length: %d", size)
-				}
-
-				for {
-					code, ok = iter.Next()
-					if !ok {
-						break
-					}
-
-					_code = code
-				}
-			}
-		})
-	}
-}
diff -pruN 0.18.8-1/kmer.go 0.19.0-1/kmer.go
--- 0.18.8-1/kmer.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/kmer.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,457 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//b
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"bytes"
-	"errors"
-)
-
-// ErrIllegalBase means that base beyond IUPAC symbols are  detected.
-var ErrIllegalBase = errors.New("unikmer: illegal base")
-
-// ErrKOverflow means K > 32.
-var ErrKOverflow = errors.New("unikmer: k-mer size (1-32) overflow")
-
-// ErrCodeOverflow means the encode interger is bigger than 4^k.
-var ErrCodeOverflow = errors.New("unikmer: code value overflow")
-
-// slice is much faster than switch and map.
-var base2bit = [256]uint64{
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 0, 1, 1, 0, 4, 4, 2, 0, 4, 4, 2, 4, 0, 0, 4,
-	4, 4, 0, 1, 3, 3, 0, 0, 4, 1, 4, 4, 4, 4, 4, 4,
-	4, 0, 1, 1, 0, 4, 4, 2, 0, 4, 4, 2, 4, 0, 0, 4,
-	4, 4, 0, 1, 3, 3, 0, 0, 4, 1, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-}
-
-// var base2bit []uint64
-
-// MaxCode is the maxinum interger for all Ks.
-var MaxCode []uint64
-
-func init() {
-	MaxCode = make([]uint64, 33)
-	for i := 1; i <= 32; i++ {
-		MaxCode[i] = 1<<uint(i*2) - 1
-	}
-
-	// base2bit = make([]uint64, 256)
-	// for i := range base2bit {
-	// 	base2bit[i] = 4
-	// }
-	// base2bit['A'] = 0
-	// base2bit['a'] = 0
-	// base2bit['N'] = 0
-	// base2bit['n'] = 0
-	// base2bit['M'] = 0
-	// base2bit['m'] = 0
-	// base2bit['V'] = 0
-	// base2bit['v'] = 0
-	// base2bit['H'] = 0
-	// base2bit['h'] = 0
-	// base2bit['R'] = 0
-	// base2bit['r'] = 0
-	// base2bit['D'] = 0
-	// base2bit['d'] = 0
-	// base2bit['W'] = 0
-	// base2bit['w'] = 0
-
-	// base2bit['C'] = 1
-	// base2bit['c'] = 1
-	// base2bit['S'] = 1
-	// base2bit['s'] = 1
-	// base2bit['B'] = 1
-	// base2bit['b'] = 1
-	// base2bit['Y'] = 1
-	// base2bit['y'] = 1
-
-	// base2bit['G'] = 2
-	// base2bit['g'] = 2
-	// base2bit['K'] = 2
-	// base2bit['k'] = 2
-
-	// base2bit['T'] = 3
-	// base2bit['t'] = 3
-	// base2bit['U'] = 3
-	// base2bit['u'] = 3
-
-	// for i := 0; i < 256; i++ {
-	// 	fmt.Fprintf(os.Stderr, "%d,", base2bit[i])
-	// 	if (i+1)%16 == 0 {
-	// 		fmt.Fprintln(os.Stderr)
-	// 	}
-	// }
-}
-
-// Encode converts byte slice to bits.
-//
-// Codes:
-//
-// 	  A    0b00
-// 	  C    0b01
-// 	  G    0b10
-// 	  T    0b11
-//
-// For degenerate bases, only the first base is kept.
-//
-//     M       AC     A
-//     V       ACG    A
-//     H       ACT    A
-//     R       AG     A
-//     D       AGT    A
-//     W       AT     A
-//     S       CG     C
-//     B       CGT    C
-//     Y       CT     C
-//     K       GT     G
-//     N       ACGT   A
-//
-func Encode(kmer []byte) (code uint64, err error) {
-	if len(kmer) == 0 || len(kmer) > 32 {
-		return 0, ErrKOverflow
-	}
-
-	var v uint64
-	for _, b := range kmer {
-		code <<= 2
-		v = base2bit[b]
-		// if v > 3 {
-		if v == 4 {
-			return code, ErrIllegalBase
-		}
-		code |= v
-	}
-	return code, nil
-}
-
-// ErrNotConsecutiveKmers means the two k-mers are not consecutive.
-var ErrNotConsecutiveKmers = errors.New("unikmer: not consecutive k-mers")
-
-// MustEncodeFromFormerKmer encodes from former the k-mer,
-// assuming the k-mer and leftKmer are both OK.
-func MustEncodeFromFormerKmer(kmer []byte, leftKmer []byte, leftCode uint64) (uint64, error) {
-	v := base2bit[kmer[len(kmer)-1]]
-	// if v > 3 {
-	if v == 4 {
-		return leftCode, ErrIllegalBase
-	}
-	// retrieve (k-1)*2 bits and << 2, and then add v
-	return leftCode&((1<<(uint(len(kmer)-1)<<1))-1)<<2 | v, nil
-}
-
-// EncodeFromFormerKmer encodes from the former k-mer, inspired by ntHash
-func EncodeFromFormerKmer(kmer []byte, leftKmer []byte, leftCode uint64) (uint64, error) {
-	if len(kmer) == 0 {
-		return 0, ErrKOverflow
-	}
-	if len(kmer) != len(leftKmer) {
-		return 0, ErrKMismatch
-	}
-	if !bytes.Equal(kmer[0:len(kmer)-1], leftKmer[1:]) {
-		return 0, ErrNotConsecutiveKmers
-	}
-	return MustEncodeFromFormerKmer(kmer, leftKmer, leftCode)
-}
-
-// MustEncodeFromLatterKmer encodes from the latter k-mer,
-// assuming the k-mer and rightKmer are both OK.
-func MustEncodeFromLatterKmer(kmer []byte, rightKmer []byte, rightCode uint64) (uint64, error) {
-	v := base2bit[kmer[0]]
-	// if v > 3 {
-	if v == 4 {
-		return rightCode, ErrIllegalBase
-	}
-
-	return v<<(uint(len(kmer)-1)<<1) | rightCode>>2, nil
-}
-
-// EncodeFromLatterKmer encodes from the former k-mer.
-func EncodeFromLatterKmer(kmer []byte, rightKmer []byte, rightCode uint64) (uint64, error) {
-	if len(kmer) == 0 {
-		return 0, ErrKOverflow
-	}
-	if len(kmer) != len(rightKmer) {
-		return 0, ErrKMismatch
-	}
-	if !bytes.Equal(rightKmer[0:len(kmer)-1], kmer[1:len(rightKmer)]) {
-		return 0, ErrNotConsecutiveKmers
-	}
-	return MustEncodeFromLatterKmer(kmer, rightKmer, rightCode)
-}
-
-// Reverse returns code of the reversed sequence.
-func Reverse(code uint64, k int) (c uint64) {
-	if k <= 0 || k > 32 {
-		panic(ErrKOverflow)
-	}
-	// for i := 0; i < k; i++ {
-	// 	c = (c << 2) | (code & 3)
-	// 	code >>= 2
-	// }
-	// return
-
-	// https: //www.biostars.org/p/113640, with a little modification
-	c = code
-	c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2)
-	c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4)
-	c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8)
-	c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16)
-	c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32)
-	return (c >> (2 * (32 - k)))
-}
-
-// MustReverse is similar to Reverse, but does not check k.
-func MustReverse(code uint64, k int) (c uint64) {
-	// for i := 0; i < k; i++ {
-	// 	c = (c << 2) | (code & 3)
-	// 	code >>= 2
-	// }
-	// return
-
-	// https: //www.biostars.org/p/113640, with a little modification
-	c = code
-	c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2)
-	c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4)
-	c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8)
-	c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16)
-	c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32)
-	return (c >> (2 * (32 - k)))
-}
-
-// Complement returns code of complement sequence.
-func Complement(code uint64, k int) uint64 {
-	if k <= 0 || k > 32 {
-		panic(ErrKOverflow)
-	}
-	return code ^ (1<<uint(k<<1) - 1)
-}
-
-// MustComplement is similar to Complement, but does not check k.
-func MustComplement(code uint64, k int) uint64 {
-	return code ^ (1<<uint(k<<1) - 1)
-}
-
-// RevComp returns code of reverse complement sequence.
-func RevComp(code uint64, k int) (c uint64) {
-	if k <= 0 || k > 32 {
-		panic(ErrKOverflow)
-	}
-	// for i := 0; i < k; i++ {
-	// 	c = (c << 2) | (code&3 ^ 3)
-	// 	code >>= 2
-	// }
-	// return
-
-	// https://www.biostars.org/p/113640/#9474334
-	c = ^code
-	c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2)
-	c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4)
-	c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8)
-	c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16)
-	c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32)
-	return (c >> (2 * (32 - k)))
-}
-
-// MustRevComp is similar to RevComp, but does not check k.
-func MustRevComp(code uint64, k int) (c uint64) {
-	// for i := 0; i < k; i++ {
-	// 	c = (c << 2) | (code&3 ^ 3)
-	// 	code >>= 2
-	// }
-	// return
-
-	// https://www.biostars.org/p/113640/#9474334
-	c = ^code
-	c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2)
-	c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4)
-	c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8)
-	c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16)
-	c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32)
-	return (c >> (2 * (32 - k)))
-}
-
-// Canonical returns code of its canonical kmer.
-func Canonical(code uint64, k int) uint64 {
-	if k <= 0 || k > 32 {
-		panic(ErrKOverflow)
-	}
-
-	var rc uint64
-	// c := code
-	// for i := 0; i < k; i++ {
-	// 	rc = (rc << 2) | (c&3 ^ 3)
-	// 	c >>= 2
-	// }
-
-	// https://www.biostars.org/p/113640/#9474334
-	c := ^code
-	c = ((c >> 2 & 0x3333333333333333) | (c&0x3333333333333333)<<2)
-	c = ((c >> 4 & 0x0F0F0F0F0F0F0F0F) | (c&0x0F0F0F0F0F0F0F0F)<<4)
-	c = ((c >> 8 & 0x00FF00FF00FF00FF) | (c&0x00FF00FF00FF00FF)<<8)
-	c = ((c >> 16 & 0x0000FFFF0000FFFF) | (c&0x0000FFFF0000FFFF)<<16)
-	c = ((c >> 32 & 0x00000000FFFFFFFF) | (c&0x00000000FFFFFFFF)<<32)
-	rc = (c >> (2 * (32 - k)))
-
-	if rc < code {
-		return rc
-	}
-	return code
-}
-
-// MustCanonical is similar to Canonical, but does not check k.
-func MustCanonical(code uint64, k int) uint64 {
-	var rc uint64
-	c := code
-	for i := 0; i < k; i++ {
-		rc = (rc << 2) | (c&3 ^ 3)
-		c >>= 2
-	}
-	if rc < code {
-		return rc
-	}
-	return code
-}
-
-// bit2base is for mapping bit to base.
-var bit2base = [4]byte{'A', 'C', 'G', 'T'}
-
-// bit2str is for output bits string
-var bit2str = [4]string{"00", "01", "10", "11"}
-
-// Decode converts the code to original seq
-func Decode(code uint64, k int) []byte {
-	if k <= 0 || k > 32 {
-		panic(ErrKOverflow)
-	}
-	if code > MaxCode[k] {
-		panic(ErrCodeOverflow)
-	}
-	kmer := make([]byte, k)
-	for i := 0; i < k; i++ {
-		kmer[k-1-i] = bit2base[code&3]
-		code >>= 2
-	}
-	return kmer
-}
-
-// MustDecode is similar to Decode, but does not check k and code.
-func MustDecode(code uint64, k int) []byte {
-	kmer := make([]byte, k)
-	for i := 0; i < k; i++ {
-		kmer[k-1-i] = bit2base[code&3]
-		code >>= 2
-	}
-	return kmer
-}
-
-// KmerCode is a struct representing a k-mer in 64-bits.
-type KmerCode struct {
-	Code uint64
-	K    int
-}
-
-// NewKmerCode returns a new KmerCode struct from byte slice.
-func NewKmerCode(kmer []byte) (KmerCode, error) {
-	code, err := Encode(kmer)
-	if err != nil {
-		return KmerCode{}, err
-	}
-	return KmerCode{code, len(kmer)}, err
-}
-
-// NewKmerCodeFromFormerOne computes KmerCode from the Former consecutive k-mer.
-func NewKmerCodeFromFormerOne(kmer []byte, leftKmer []byte, preKcode KmerCode) (KmerCode, error) {
-	code, err := EncodeFromFormerKmer(kmer, leftKmer, preKcode.Code)
-	if err != nil {
-		return KmerCode{}, err
-	}
-	return KmerCode{code, len(kmer)}, err
-}
-
-// NewKmerCodeMustFromFormerOne computes KmerCode from the Former consecutive k-mer,
-// assuming the k-mer and leftKmer are both OK.
-func NewKmerCodeMustFromFormerOne(kmer []byte, leftKmer []byte, preKcode KmerCode) (KmerCode, error) {
-	code, err := MustEncodeFromFormerKmer(kmer, leftKmer, preKcode.Code)
-	if err != nil {
-		return KmerCode{}, err
-	}
-	return KmerCode{code, len(kmer)}, err
-}
-
-// Equal checks wether two KmerCodes are the same.
-func (kcode KmerCode) Equal(kcode2 KmerCode) bool {
-	return kcode.K == kcode2.K && kcode.Code == kcode2.Code
-}
-
-// Rev returns KmerCode of the reverse sequence.
-func (kcode KmerCode) Rev() KmerCode {
-	return KmerCode{MustReverse(kcode.Code, kcode.K), kcode.K}
-}
-
-// Comp returns KmerCode of the complement sequence.
-func (kcode KmerCode) Comp() KmerCode {
-	return KmerCode{MustComplement(kcode.Code, kcode.K), kcode.K}
-}
-
-// RevComp returns KmerCode of the reverse complement sequence.
-func (kcode KmerCode) RevComp() KmerCode {
-	return KmerCode{MustRevComp(kcode.Code, kcode.K), kcode.K}
-}
-
-// Canonical returns its canonical kmer
-func (kcode KmerCode) Canonical() KmerCode {
-	rcKcode := kcode.RevComp()
-	if rcKcode.Code < kcode.Code {
-		return rcKcode
-	}
-	return kcode
-}
-
-// Bytes returns k-mer in []byte.
-func (kcode KmerCode) Bytes() []byte {
-	return Decode(kcode.Code, kcode.K)
-}
-
-// String returns k-mer in string
-func (kcode KmerCode) String() string {
-	return string(Decode(kcode.Code, kcode.K))
-}
-
-// BitsString returns code to string
-func (kcode KmerCode) BitsString() string {
-	var buf bytes.Buffer
-	for _, b := range Decode(kcode.Code, kcode.K) {
-		buf.WriteString(bit2str[base2bit[b]])
-	}
-	return buf.String()
-}
diff -pruN 0.18.8-1/kmer-sort.go 0.19.0-1/kmer-sort.go
--- 0.18.8-1/kmer-sort.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/kmer-sort.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,87 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-// KmerCodeSlice is a slice of KmerCode, for sorting
-type KmerCodeSlice []KmerCode
-
-// Len return length of the slice
-func (codes KmerCodeSlice) Len() int {
-	return len(codes)
-}
-
-// Swap swaps two elements
-func (codes KmerCodeSlice) Swap(i, j int) {
-	codes[i], codes[j] = codes[j], codes[i]
-}
-
-// Less simply compare two KmerCode
-func (codes KmerCodeSlice) Less(i, j int) bool {
-	return codes[i].Code < codes[j].Code
-}
-
-// func splitKmer(code uint64, k int) (uint64, uint64, uint64, uint64) {
-// 	// -====, k = 4:  ---, -, =, ===
-// 	return code >> 2, code & 3, code >> (uint(k-1) << 1) & 3, code & ((1 << (uint(k-1) << 1)) - 1)
-// }
-
-// CodeSlice is a slice of Kmer code (uint64), for sorting
-type CodeSlice []uint64
-
-// Len return length of the slice
-func (codes CodeSlice) Len() int {
-	return len(codes)
-}
-
-// Swap swaps two elements
-func (codes CodeSlice) Swap(i, j int) {
-	codes[i], codes[j] = codes[j], codes[i]
-}
-
-// Less simply compare two KmerCode
-func (codes CodeSlice) Less(i, j int) bool {
-	return codes[i] < codes[j]
-}
-
-// CodeTaxid is the code-taxid pair
-type CodeTaxid struct {
-	Code uint64
-	// _     uint32 // needed? to test
-	Taxid uint32
-}
-
-// CodeTaxidSlice is a list of CodeTaxid, just for sorting
-type CodeTaxidSlice []CodeTaxid
-
-// Len return length of the slice
-func (pairs CodeTaxidSlice) Len() int {
-	return len(pairs)
-}
-
-// Swap swaps two elements
-func (pairs CodeTaxidSlice) Swap(i, j int) {
-	pairs[i], pairs[j] = pairs[j], pairs[i]
-}
-
-// Less simply compare two KmerCode
-func (pairs CodeTaxidSlice) Less(i, j int) bool {
-	return pairs[i].Code < pairs[j].Code
-}
diff -pruN 0.18.8-1/kmer_test.go 0.19.0-1/kmer_test.go
--- 0.18.8-1/kmer_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/kmer_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,282 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"bytes"
-	"fmt"
-	"math/rand"
-	"testing"
-)
-
-var randomMers [][]byte
-var randomMersN = 100000
-
-var benchMer = []byte("ACTGactgGTCAgtcaactgGTCAACTGGTCA")
-var codeBenchMer uint64 = 2170370756141391540
-var benchMer2 = []byte("CTGactgGTCAgtcaactgGTCAACTGGTCAC")
-var codeBenchMer2 uint64 = 8681483024565566161
-var benchCode uint64
-var benchKmerCode KmerCode
-
-func init() {
-	randomMers = make([][]byte, randomMersN)
-	for i := 0; i < randomMersN; i++ {
-		randomMers[i] = make([]byte, rand.Intn(32)+1)
-		for j := range randomMers[i] {
-			randomMers[i][j] = bit2base[rand.Intn(4)]
-		}
-	}
-
-	// for benchmark
-	var err error
-	benchCode, err = Encode(benchMer)
-	if err != nil {
-		panic(fmt.Sprintf("init: fail to encode %s", benchMer))
-	}
-
-	benchKmerCode, err = NewKmerCode(benchMer)
-	if err != nil {
-		panic(fmt.Sprintf("init: fail to create KmerCode from %s", benchMer))
-	}
-}
-
-// TestEncodeDecode tests encode and decode
-func TestEncodeDecode(t *testing.T) {
-	var kcode KmerCode
-	var err error
-	for _, mer := range randomMers {
-		kcode, err = NewKmerCode(mer) // encode
-		if err != nil {
-			t.Errorf("Encode error: %s", mer)
-		}
-
-		if !bytes.Equal(mer, kcode.Bytes()) { // decode
-			t.Errorf("Decode error: %s != %s ", mer, kcode.Bytes())
-		}
-	}
-}
-
-// TestEncodeFromFormerKmer tests TestEncodeFromFormerKmer
-func TestEncodeFromFormerKmer(t *testing.T) {
-	var err error
-	k := 5
-	first := true
-	var code, code0, pCode uint64
-	var kmer, pKmer []byte
-	for i := 0; i < len(benchMer)-k; i++ {
-		kmer = benchMer[i : i+k]
-		if first {
-			code, err = Encode(kmer)
-			if err != nil {
-				t.Errorf("Encode error: %s", kmer)
-			}
-
-			pCode = code
-			first = false
-			continue
-		}
-		pKmer = benchMer[i-1 : i+k-1]
-		code, err = EncodeFromFormerKmer(kmer, pKmer, pCode)
-		if err != nil {
-			t.Errorf("Encode error: %s", kmer)
-		}
-
-		code0, err = Encode(kmer)
-		if err != nil {
-			t.Errorf("Encode error: %s", kmer)
-		}
-		if code0 != code {
-			t.Errorf("EncodeFromFormerKmer error for %s: wrong %d != right %d", kmer, code, code0)
-		}
-
-		pCode = code
-	}
-}
-
-func TestEncodeFromLatterKmer(t *testing.T) {
-	var err error
-	k := 5
-	first := true
-	var code, code0, pCode uint64
-	var kmer, pKmer []byte
-	for i := len(benchMer) - k - 1; i >= 0; i-- {
-		kmer = benchMer[i : i+k]
-		if first {
-			code, err = Encode(kmer)
-			if err != nil {
-				t.Errorf("Encode error: %s", kmer)
-			}
-
-			pCode = code
-			first = false
-			continue
-		}
-		pKmer = benchMer[i+1 : i+k+1]
-		code, err = EncodeFromLatterKmer(kmer, pKmer, pCode)
-		if err != nil {
-			t.Errorf("Encode error: %s", kmer)
-		}
-
-		code0, err = Encode(kmer)
-		if err != nil {
-			t.Errorf("Encode error: %s", kmer)
-		}
-		if code0 != code {
-			t.Errorf("EncodeFromLatterKmer error for %s: wrong %d != right %d", kmer, code, code0)
-		}
-
-		pCode = code
-	}
-}
-
-// TestRevComp tests revcomp
-func TestRevComp(t *testing.T) {
-	var kcode KmerCode
-	for _, mer := range randomMers {
-		kcode, _ = NewKmerCode(mer)
-
-		// fmt.Printf("%s, rev:%s\n", kcode, kcode.Rev())
-
-	}
-
-	for _, mer := range randomMers {
-		kcode, _ = NewKmerCode(mer)
-
-		if !kcode.Rev().Rev().Equal(kcode) {
-			t.Errorf("Rev() error: %s, Rev(): %s", kcode, kcode.Rev())
-		}
-
-		if !kcode.Comp().Comp().Equal(kcode) {
-			t.Errorf("Comp() error: %s, Comp(): %s", kcode, kcode.Comp())
-		}
-
-		if !kcode.Comp().Rev().Equal(kcode.RevComp()) {
-			t.Errorf("Rev().Comp() error: %s, Rev(): %s, Comp(): %s, RevComp: %s", kcode, kcode.Rev(), kcode.Comp(), kcode.RevComp())
-		}
-	}
-}
-
-var result uint64
-
-// BenchmarkEncode tests speed of Encode()
-func BenchmarkEncodeK32(b *testing.B) {
-	var code uint64
-	var err error
-	for i := 0; i < b.N; i++ {
-		code, err = Encode(benchMer)
-		if err != nil {
-			b.Errorf("Encode error: %s", benchMer)
-		}
-		if code != codeBenchMer {
-			b.Errorf("wrong result: %s", benchMer)
-		}
-	}
-	result = code
-}
-
-// BenchmarkEncode tests speed of EncodeFromFormerKmer
-func BenchmarkEncodeFromFormerKmerK32(b *testing.B) {
-	var code uint64
-	var err error
-	for i := 0; i < b.N; i++ {
-		code, err = EncodeFromFormerKmer(benchMer2, benchMer, benchCode)
-		if err != nil {
-			b.Errorf("Encode error: %s", benchMer)
-		}
-		if code != codeBenchMer2 {
-			b.Errorf("wrong result: %s", benchMer)
-		}
-	}
-	result = code
-}
-
-// BenchmarkEncode tests speed of MustEncodeFromFormerKmer
-func BenchmarkMustEncodeFromFormerKmerK32(b *testing.B) {
-	var code uint64
-	var err error
-	for i := 0; i < b.N; i++ {
-		code, err = MustEncodeFromFormerKmer(benchMer2, benchMer, benchCode)
-		if err != nil {
-			b.Errorf("Encode error: %s", benchMer)
-		}
-		if code != codeBenchMer2 {
-			b.Errorf("wrong result: %s", benchMer)
-		}
-	}
-	result = code
-}
-
-var result2 []byte
-
-// BenchmarkDecode tests speed of decode
-func BenchmarkDecodeK32(b *testing.B) {
-	var r []byte
-	for i := 0; i < b.N; i++ {
-		r = Decode(benchCode, len(benchMer))
-	}
-	result2 = r
-}
-
-func BenchmarkMustDecodeK32(b *testing.B) {
-	var r []byte
-	for i := 0; i < b.N; i++ {
-		r = MustDecode(benchCode, len(benchMer))
-	}
-	result2 = r
-}
-
-var result3 KmerCode
-
-// BenchmarkRevK32 tests speed of rev
-func BenchmarkRevK32(b *testing.B) {
-	var r KmerCode
-	for i := 0; i < b.N; i++ {
-		r = benchKmerCode.Rev()
-	}
-	result3 = r
-}
-
-// BenchmarkRevK32 tests speed of comp
-func BenchmarkCompK32(b *testing.B) {
-	var r KmerCode
-	for i := 0; i < b.N; i++ {
-		r = benchKmerCode.Comp()
-	}
-	result3 = r
-}
-
-// BenchmarkRevCompK32 tests speed of revcomp
-func BenchmarkRevCompK32(b *testing.B) {
-	var r KmerCode
-	for i := 0; i < b.N; i++ {
-		r = benchKmerCode.RevComp()
-	}
-	result3 = r
-}
-
-func BenchmarkCannonalK32(b *testing.B) {
-	var r KmerCode
-	for i := 0; i < b.N; i++ {
-		r = benchKmerCode.Canonical()
-	}
-	result3 = r
-}
diff -pruN 0.18.8-1/mkdocs.yml 0.19.0-1/mkdocs.yml
--- 0.18.8-1/mkdocs.yml	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/mkdocs.yml	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,22 @@
+site_name: unikmer - Toolkit for k-mer with taxonomic information
+site_url: https://bioinf.shenwei.me/unikmer
+nav:
+- Home: index.md
+- Download: download.md
+- Usage: usage.md
+- More tools: https://github.com/shenwei356
+
+theme:
+    name: material
+    palette:
+        primary: teal
+        accent: blue grey
+    feature:
+        navigation.tabs: false
+extra:
+  manifest: manifest.webmanifest
+
+repo_url: https://github.com/shenwei356/unikmer
+site_description: unikmer - Toolkit for k-mer with taxonomic information
+site_author: Wei Shen
+# google_analytics: ['UA-28948626-9', 'bioinf.shenwei.me/unikmer']
diff -pruN 0.18.8-1/README.md 0.19.0-1/README.md
--- 0.18.8-1/README.md	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/README.md	2022-04-25 12:44:35.000000000 +0000
@@ -1,6 +1,7 @@
-# unikmer
+# unikmer: Toolkit for k-mer with taxonomic information
 
-`unikmer` is a golang package and a toolkit for nucleic acid [k-mer]((https://en.wikipedia.org/wiki/K-mer)) analysis, providing functions
+`unikmer` is a toolkit for nucleic acid [k-mer](https://en.wikipedia.org/wiki/K-mer) analysis, 
+providing functions
 including set operation k-mers (sketch) optional with
 TaxIds but without count information.
 
@@ -16,81 +17,32 @@ repeated k-mers.
 <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
 ## Table of Contents
 
-- [unikmer](#unikmer)
-  - [Table of Contents](#table-of-contents)
-  - [The package](#the-package)
-    - [Installation](#installation)
-    - [Benchmark](#benchmark)
-  - [The toolkit](#the-toolkit)
-    - [Installation](#installation-1)
-    - [Commands](#commands)
-    - [Binary file (.unik)](#binary-file-unik)
-      - [Compression rate comparison](#compression-rate-comparison)
-    - [Quick Start](#quick-start)
-  - [Contributing](#contributing)
-  - [License](#license)
+- [Using cases](#using-cases)
+- [Installation](#installation)
+- [Commands](#commands)
+- [Binary file](#binary-file)
+- [Quick start](#quick-start)
+- [Support](#support)
+- [License](#license)
 
 <!-- END doctoc generated TOC please keep comment here to allow auto update -->
 
-## The package
+## Using cases
 
-[![GoDoc](https://godoc.org/github.com/shenwei356/unikmer?status.svg)](https://godoc.org/github.com/shenwei356/unikmer)
-[![Go Report Card](https://goreportcard.com/badge/github.com/shenwei356/unikmer)](https://goreportcard.com/report/github.com/shenwei356/unikmer)
+- Finding conserved regions in all genomes of a species.
+- Finding species/strain-specific sequences for designing probes/primers.
 
-The unikmer package provides basic manipulations of K-mers (sketch)
-optional with TaxIds but without frequency information,
-and also provides serialization methods.
+## Installation
 
-### Installation
+1. Downloading [executable binary files](https://github.com/shenwei356/unikmer/releases).
 
-    go get -u github.com/shenwei356/unikmer
+1. Via Bioconda [![Anaconda Cloud](https://anaconda.org/bioconda/unikmer/badges/version.svg)](https://anaconda.org/bioconda/unikmer) [![downloads](https://anaconda.org/bioconda/unikmer/badges/downloads.svg)](https://anaconda.org/bioconda/unikmer)
 
-### Benchmark
+        conda install -c bioconda unikmer
 
-CPU: AMD Ryzen 7 2700X Eight-Core Processor, 3.7 GHz
+## Commands
 
-    $ go test . -bench=Bench* -benchmem \
-        | grep Bench \
-        | perl -pe 's/\s\s+/\t/g' \
-        | csvtk cut -Ht -f 1,3-5 \
-        | csvtk add-header -t -n test,time,memory,allocs \
-        | csvtk pretty -t -r
-    
-                                          test           time      memory        allocs
-    ------------------------------------------   ------------   ---------   -----------
-                         BenchmarkEncodeK32-16    18.66 ns/op      0 B/op   0 allocs/op
-           BenchmarkEncodeFromFormerKmerK32-16    8.030 ns/op      0 B/op   0 allocs/op
-       BenchmarkMustEncodeFromFormerKmerK32-16    1.702 ns/op      0 B/op   0 allocs/op
-                         BenchmarkDecodeK32-16    78.95 ns/op     32 B/op   1 allocs/op
-                     BenchmarkMustDecodeK32-16    76.86 ns/op     32 B/op   1 allocs/op
-                            BenchmarkRevK32-16    3.639 ns/op      0 B/op   0 allocs/op
-                           BenchmarkCompK32-16   0.7971 ns/op      0 B/op   0 allocs/op
-                        BenchmarkRevCompK32-16    3.831 ns/op      0 B/op   0 allocs/op
-                       BenchmarkCannonalK32-16    4.210 ns/op      0 B/op   0 allocs/op
-
-              BenchmarkKmerIterator/1.00_KB-16    12625 ns/op    160 B/op   1 allocs/op
-              BenchmarkHashIterator/1.00_KB-16     8118 ns/op    232 B/op   3 allocs/op
-           BenchmarkProteinIterator/1.00_KB-16    14324 ns/op    480 B/op   3 allocs/op
-
-           BenchmarkMinimizerSketch/1.00_KB-16    62497 ns/op    688 B/op   6 allocs/op
-             BenchmarkSyncmerSketch/1.00_KB-16    99390 ns/op   1456 B/op   8 allocs/op
-    BenchmarkProteinMinimizerSketch/1.00_KB-16    24888 ns/op    728 B/op   5 allocs/op
-
-## The toolkit
-
-### Installation
-
-1. Downloading [executable binary files](https://github.com/shenwei356/unikmer/releases) (Latest version).
-
-1. Via Bioconda (not available now)
-
-        conda install unikmer
-
-1. Via Homebrew (not lastest version)
-
-        brew install brewsci/bio/unikmer
-
-### Commands
+[Usages](https://bioinf.shenwei.me/unikmer/usage)
 
 1. Counting
 
@@ -98,34 +50,39 @@ CPU: AMD Ryzen 7 2700X Eight-Core Proces
 
 1. Information
 
-        stats           Statistics of binary files
+        info            Information of binary files
         num             Quickly inspect number of k-mers in binary files
 
 1. Format conversion
 
+        view            Read and output binary format to plain text
+        dump            Convert plain k-mer text to binary format
+
         encode          Encode plain k-mer text to integer
         decode          Decode encoded integer to k-mer text
         
-        view            Read and output binary format to plain text
-        dump            Convert plain k-mer text to binary format
 
 1. Set operations
 
-        head            Extract the first N k-mers
         concat          Concatenate multiple binary files without removing duplicates
         inter           Intersection of multiple binary files
         common          Find k-mers shared by most of multiple binary files
         union           Union of multiple binary files
         diff            Set difference of multiple binary files
-        grep            Search k-mers from binary files
+
+1. Split and merge
 
         sort            Sort k-mers in binary files to reduce file size
         split           Split k-mers into sorted chunk files
         tsplit          Split k-mers according to TaxId
         merge           Merge k-mers from sorted chunk files
 
+1. Subset
+
+        head            Extract the first N k-mers
         sample          Sample k-mers from binary files
-        filter          Filter low-complexity k-mers
+        grep            Search k-mers from binary files
+        filter          Filter out low-complexity k-mers
         rfilter         Filter k-mers by taxonomic rank
 
 1. Searching on genomes
@@ -135,11 +92,12 @@ CPU: AMD Ryzen 7 2700X Eight-Core Proces
 
 1. Misc
 
-        genautocomplete Generate shell autocompletion script
-        help            Help about any command
+        autocompletion  Generate shell autocompletion script
         version         Print version information and check for update
 
-### Binary file (.unik)
+## Binary file
+
+[![Go Reference](https://pkg.go.dev/badge/github.com/shenwei356/unik.svg)](https://pkg.go.dev/github.com/shenwei356/unik)
 
 K-mers (represented in `uint64` in RAM ) are serialized in 8-Byte
 (or less Bytes for shorter k-mers in compact format,
@@ -147,7 +105,7 @@ or much less Bytes for sorted k-mers) ar
 optionally compressed in gzip format with extension of `.unik`.
 TaxIds are optionally stored next to k-mers with 4 or less bytes.
 
-#### Compression rate comparison
+### Compression ratio comparison
 
 No TaxIds stored in this test.
 
@@ -174,7 +132,7 @@ label           |encoded-kmer<sup>a</sup
 - In all test, flag `--canonical` is ON when running `unikmer count`.
 
 
-### Quick Start
+## Quick Start
 
 
     # memusg is for compute time and RAM usage: https://github.com/shenwei356/memusg
@@ -262,7 +220,7 @@ label           |encoded-kmer<sup>a</sup
     4c038832209278840d4d75944b29219c  -
     
     
-    # duplicated k-mers
+    # duplicate k-mers
     $ memusg -t unikmer sort *.k23.sorted.unik -o dup.k23 -d -m 1M
     elapsed time: 1.143s
     peak rss: 240.18 MB
@@ -323,18 +281,20 @@ label           |encoded-kmer<sup>a</sup
     bwa index $g; samtools faidx $g
 
     ncpu=12
-    ls $f.fa.gz | rush -j 1 -v ref=$g -v j=$ncpu \
-    ' bwa aln -o 0 -l 17 -k 0 -t {j} {ref} {} \
-        | bwa samse {ref} - {} \
-        | samtools view -bS > {}.bam; \
-        samtools sort -T {}.tmp -@ {j} {}.bam -o {}.sorted.bam; \
-        samtools index {}.sorted.bam; \
-        samtools flagstat {}.sorted.bam > {}.sorted.bam.flagstat; \
-        /bin/rm {}.bam '  
+    ls $f.fa.gz \
+        | rush -j 1 -v ref=$g -v j=$ncpu \
+            'bwa aln -o 0 -l 17 -k 0 -t {j} {ref} {} \
+                | bwa samse {ref} - {} \
+                | samtools view -bS > {}.bam; \
+             samtools sort -T {}.tmp -@ {j} {}.bam -o {}.sorted.bam; \
+             samtools index {}.sorted.bam; \
+             samtools flagstat {}.sorted.bam > {}.sorted.bam.flagstat; \
+             /bin/rm {}.bam '  
 
-## Contributing
+## Support
 
-We welcome pull requests, bug fixes and issue reports.
+Please [open an issue](https://github.com/shenwei356/unikmer/issues) to report bugs,
+propose new functions or ask for help.
 
 ## License
 
diff -pruN 0.18.8-1/serialization.go 0.19.0-1/serialization.go
--- 0.18.8-1/serialization.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/serialization.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,840 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"encoding/binary"
-	"errors"
-	"fmt"
-	"io"
-)
-
-// MainVersion is the main version number.
-const MainVersion uint8 = 5
-
-// MinorVersion is the minor version number.
-const MinorVersion uint8 = 0
-
-// Magic number of binary file.
-var Magic = [8]byte{'.', 'u', 'n', 'i', 'k', 'm', 'e', 'r'}
-
-// ErrInvalidFileFormat means invalid file format.
-var ErrInvalidFileFormat = errors.New("unikmer: invalid binary format")
-
-// ErrBrokenFile means the file is not complete.
-var ErrBrokenFile = errors.New("unikmer: broken file")
-
-// ErrKMismatch means K size mismatch.
-var ErrKMismatch = errors.New("unikmer: K mismatch")
-
-// ErrDescTooLong means length of description two long
-var ErrDescTooLong = errors.New("unikmer: description too long, 128 bytes at most")
-
-// ErrCallOrder means WriteTaxid/ReadTaxid should be called after WriteCode/ReadCode
-var ErrCallOrder = errors.New("unikmer: WriteTaxid/ReadTaxid should be called after WriteCode/ReadCode")
-
-// ErrCallLate means SetMaxTaxid/SetGlobalTaxid should be called before writing KmerCode/code/taxid
-var ErrCallLate = errors.New("unikmer: SetMaxTaxid/SetGlobalTaxid should be called before writing KmerCode/code/taxid")
-
-// ErrCallReadWriteTaxid means flag UnikIncludeTaxID is off, but you call ReadTaxid/WriteTaxid
-var ErrCallReadWriteTaxid = errors.New("unikmer: can not call ReadTaxid/WriteTaxid when flag UnikIncludeTaxID is off")
-
-// ErrInvalidTaxid means zero given for a taxid.
-var ErrInvalidTaxid = errors.New("unikmer: invalid taxid, 0 not allowed")
-
-// ErrVersionMismatch means version mismatch between files and program
-var ErrVersionMismatch = errors.New("unikmer: version mismatch")
-
-var be = binary.BigEndian
-
-var descMaxLen = 1024
-var conservedDataLen = 64
-
-// Header contains metadata
-type Header struct {
-	MainVersion  uint8
-	MinorVersion uint8
-	K            int
-	Flag         uint32
-	Number       uint64 // Number of Kmers, may not be accurate
-	globalTaxid  uint32 // universal taxid, 0 for no record
-	maxTaxid     uint32
-	Description  []byte // let's limit it to 128 Bytes
-	Scale        uint32 // scale of down-sampling
-	MaxHash      uint64 // max hash for scaling/down-sampling
-}
-
-const (
-	// UnikCompact means k-mers are serialized in fix-length (n = int((K + 3) / 4) ) of byte array.
-	UnikCompact = 1 << iota
-	// UnikCanonical means only canonical k-mers kept.
-	UnikCanonical
-	// UnikSorted means k-mers are sorted
-	UnikSorted // when sorted, the serialization structure is very different
-	// UnikIncludeTaxID means a k-mer is followed its LCA taxid
-	UnikIncludeTaxID
-
-	// UnikHashed means ntHash value are saved as code.
-	UnikHashed
-	// UnikScaled means only hashes smaller than or equal to max_hash are saved.
-	UnikScaled
-)
-
-func (h Header) String() string {
-	return fmt.Sprintf("unikmer binary k-mer data file v%d.%d with K=%d and Flag=%d",
-		h.MainVersion, h.MinorVersion, h.K, h.Flag)
-}
-
-// Reader is for reading KmerCode.
-type Reader struct {
-	Header
-	r io.Reader
-
-	buf []byte
-
-	compact bool // saving KmerCode in variable-length byte array.
-	bufsize int
-
-	sorted  bool
-	hasPrev bool
-	prev    uint64
-	buf2    []byte
-	offset  uint64
-
-	includeTaxid  bool
-	bufTaxid      []byte
-	taxidByteLen  int
-	prevTaxid     uint32 // buffered taxid
-	hasPrevTaxid  bool
-	justReadACode bool
-	lastRecord    bool
-
-	hashValue bool
-}
-
-// NewReader returns a Reader.
-func NewReader(r io.Reader) (reader *Reader, err error) {
-	reader = &Reader{r: r}
-	err = reader.readHeader()
-	if err != nil {
-		return nil, err
-	}
-	return reader, nil
-}
-
-// IsSorted tells if the k-mers in file sorted
-func (reader *Reader) IsSorted() bool {
-	return reader.Flag&UnikSorted > 0
-}
-
-// IsCanonical tells if the only canonical k-mers stored
-func (reader *Reader) IsCanonical() bool {
-	return reader.Flag&UnikCanonical > 0
-}
-
-// IsCompact tells if the k-mers are stored in a compact format
-func (reader *Reader) IsCompact() bool {
-	return reader.Flag&UnikCompact > 0
-}
-
-// IsIncludeTaxid tells if every k-mer is followed by its taxid
-func (reader *Reader) IsIncludeTaxid() bool {
-	return reader.Flag&UnikIncludeTaxID > 0
-}
-
-// IsHashed tells if ntHash values are saved.
-func (reader *Reader) IsHashed() bool {
-	return reader.Flag&UnikHashed > 0
-}
-
-// IsScaled tells if hashes is scaled
-func (reader *Reader) IsScaled() bool {
-	return reader.Flag&UnikHashed > 0 && reader.Flag&UnikScaled > 0
-}
-
-// HasGlobalTaxid means the file has a global taxid
-func (reader *Reader) HasGlobalTaxid() bool {
-	return reader.globalTaxid > 0
-}
-
-// HasTaxidInfo means the binary file contains global taxid or taxids for all k-mers
-func (reader *Reader) HasTaxidInfo() bool {
-	return reader.IsIncludeTaxid() || reader.HasGlobalTaxid()
-}
-
-// GetGlobalTaxid returns the global taxid
-func (reader *Reader) GetGlobalTaxid() uint32 {
-	return reader.globalTaxid
-}
-
-// GetTaxidBytesLength returns number of byte to store a taxid
-func (reader *Reader) GetTaxidBytesLength() int {
-	return reader.taxidByteLen
-}
-
-// GetScale returns the scale of down-sampling
-func (reader *Reader) GetScale() uint32 {
-	if reader.Scale == 0 {
-		return uint32(1)
-	}
-	return reader.Scale
-}
-
-// GetMaxHash returns the max hash for scaling.
-func (reader *Reader) GetMaxHash() uint64 {
-	if reader.MaxHash == 0 {
-		return ^uint64(0)
-	}
-	return reader.MaxHash
-}
-
-func (reader *Reader) readHeader() (err error) {
-	buf := make([]byte, 56)
-	r := reader.r
-
-	// check Magic number
-	_, err = io.ReadFull(r, buf[:8])
-	if err != nil {
-		return err
-	}
-	same := true
-	for i := 0; i < 8; i++ {
-		if Magic[i] != buf[i] {
-			same = false
-			break
-		}
-	}
-	if !same {
-		return ErrInvalidFileFormat
-	}
-
-	// read metadata
-	_, err = io.ReadFull(r, buf[:4])
-	if err != nil {
-		return err
-	}
-	// check compatibility？
-	if (buf[0] == 0 && buf[1] == 0) ||
-		MainVersion != buf[0] {
-		return ErrVersionMismatch
-	}
-	reader.MainVersion = buf[0]
-	reader.MinorVersion = buf[1]
-
-	reader.K = int(buf[2])
-
-	_, err = io.ReadFull(r, buf[:4])
-	if err != nil {
-		return err
-	}
-	reader.Flag = be.Uint32(buf[:4])
-
-	reader.buf = make([]byte, 8)
-
-	if reader.IsCompact() {
-		reader.compact = true
-		reader.bufsize = int((reader.K + 3) / 4)
-	}
-	if reader.IsSorted() {
-		reader.sorted = true
-		reader.buf2 = make([]byte, 17)
-	}
-	if reader.IsIncludeTaxid() {
-		reader.includeTaxid = true
-		reader.bufTaxid = make([]byte, 4)
-	}
-
-	// number
-	_, err = io.ReadFull(r, buf[:8])
-	if err != nil {
-		return err
-	}
-	reader.Number = be.Uint64(buf[:8])
-
-	// taxid
-	_, err = io.ReadFull(r, buf[:4])
-	if err != nil {
-		return err
-	}
-	reader.globalTaxid = be.Uint32(buf[:4])
-
-	// taxid byte length
-	_, err = io.ReadFull(r, buf[1:2])
-	if err != nil {
-		return err
-	}
-	buf[0] = 0
-	reader.taxidByteLen = int(be.Uint16(buf[:2]))
-
-	// length of description
-	var lenDesc uint16
-	_, err = io.ReadFull(r, buf[:2])
-	if err != nil {
-		return err
-	}
-	lenDesc = be.Uint16(buf[:2])
-
-	desc := make([]byte, lenDesc)
-	_, err = io.ReadFull(r, desc)
-	if err != nil {
-		return err
-	}
-	reader.Description = desc
-
-	// scale
-	_, err = io.ReadFull(r, buf[:4])
-	if err != nil {
-		return err
-	}
-	reader.Scale = be.Uint32(buf[:4])
-
-	// max hash
-	_, err = io.ReadFull(r, buf[:8])
-	if err != nil {
-		return err
-	}
-	reader.MaxHash = be.Uint64(buf[:8])
-
-	reserved := make([]byte, conservedDataLen)
-	_, err = io.ReadFull(r, reserved)
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
-// Read reads one KmerCode.
-func (reader *Reader) Read() (KmerCode, error) {
-	code, err := reader.ReadCode()
-	return KmerCode{Code: code, K: reader.K}, err
-}
-
-// ReadWithTaxid reads a KmerCode, also return taxid if having.
-func (reader *Reader) ReadWithTaxid() (KmerCode, uint32, error) {
-	code, taxid, err := reader.ReadCodeWithTaxid()
-	return KmerCode{Code: code, K: reader.K}, taxid, err
-}
-
-// ReadCodeWithTaxid reads a code, also return taxid if having.
-func (reader *Reader) ReadCodeWithTaxid() (code uint64, taxid uint32, err error) {
-	code, err = reader.ReadCode()
-	if err != nil {
-		return 0, 0, err
-	}
-	if reader.includeTaxid {
-		taxid, err = reader.ReadTaxid()
-		if err != nil {
-			return 0, 0, err
-		}
-	} else {
-		taxid = reader.globalTaxid
-	}
-	return code, taxid, err
-}
-
-// ReadTaxid reads on taxid
-func (reader *Reader) ReadTaxid() (taxid uint32, err error) {
-	if !reader.includeTaxid {
-		return 0, ErrCallReadWriteTaxid
-	}
-
-	if !reader.justReadACode {
-		return 0, ErrCallOrder
-	}
-
-	if reader.sorted {
-		if reader.lastRecord {
-			_, err = io.ReadFull(reader.r, reader.bufTaxid)
-			if err != nil {
-				return 0, err
-			}
-			reader.hasPrevTaxid = false
-			reader.justReadACode = false
-			return be.Uint32(reader.bufTaxid), nil
-		}
-
-		if reader.hasPrevTaxid {
-			c := reader.prevTaxid
-			reader.hasPrevTaxid = false
-			reader.justReadACode = false
-			return c, nil
-		}
-
-		_, err = io.ReadFull(reader.r, reader.bufTaxid[4-reader.taxidByteLen:])
-		if err != nil {
-			return 0, err
-		}
-		taxid = be.Uint32(reader.bufTaxid)
-
-		_, err = io.ReadFull(reader.r, reader.bufTaxid[4-reader.taxidByteLen:])
-		if err != nil {
-			return 0, err
-		}
-
-		reader.prevTaxid = be.Uint32(reader.bufTaxid)
-		reader.hasPrevTaxid = true
-		return taxid, nil
-	} else if reader.compact {
-		_, err = io.ReadFull(reader.r, reader.bufTaxid[4-reader.taxidByteLen:])
-	} else {
-		_, err = io.ReadFull(reader.r, reader.bufTaxid)
-	}
-	if err != nil {
-		return 0, err
-	}
-
-	reader.justReadACode = false
-	return be.Uint32(reader.bufTaxid), nil
-}
-
-// ReadCode reads one code.
-func (reader *Reader) ReadCode() (uint64, error) {
-	var err error
-	if reader.sorted {
-		if reader.hasPrev {
-			c := reader.prev
-			// reader.prev = 0
-			reader.hasPrev = false
-			reader.justReadACode = true
-			return c, nil
-		}
-
-		buf2 := reader.buf2
-		r := reader.r
-
-		// read control byte
-		var nReaded int
-		nReaded, err = io.ReadFull(r, buf2[0:1])
-		if err != nil {
-			return 0, err
-		}
-
-		ctrlByte := buf2[0]
-		if ctrlByte&128 > 0 { // last one
-			nReaded, err = io.ReadFull(r, buf2[0:8])
-			if err != nil {
-				return 0, err
-			}
-			reader.lastRecord = true
-			reader.justReadACode = true
-			return be.Uint64(buf2[0:8]), nil
-		}
-
-		// parse control byte
-		encodedBytes := ctrlByte2ByteLengths[ctrlByte]
-		nEncodedBytes := int(encodedBytes[0] + encodedBytes[1])
-
-		// read encoded bytes
-		nReaded, err = io.ReadFull(r, buf2[0:nEncodedBytes])
-		if err != nil {
-			return 0, err
-		}
-		if nReaded < nEncodedBytes {
-			return 0, ErrBrokenFile
-		}
-
-		decodedVals, nDecoded := Uint64s(ctrlByte, buf2[0:nEncodedBytes])
-		if nDecoded == 0 {
-			return 0, ErrBrokenFile
-		}
-
-		code := decodedVals[0] + reader.offset
-		reader.prev = code + decodedVals[1]
-		reader.hasPrev = true
-
-		reader.offset = code + decodedVals[1]
-
-		reader.justReadACode = true
-		return code, nil
-	} else if reader.compact {
-		_, err = io.ReadFull(reader.r, reader.buf[8-reader.bufsize:])
-	} else {
-		_, err = io.ReadFull(reader.r, reader.buf)
-	}
-	if err != nil {
-		return 0, err
-	}
-
-	reader.justReadACode = true
-	return be.Uint64(reader.buf), nil
-}
-
-// Writer writes KmerCode.
-type Writer struct {
-	Header
-	w           io.Writer
-	wroteHeader bool
-
-	buf []byte
-
-	// saving KmerCode in compact fixlength byte array.
-	compact bool
-	bufsize int
-
-	// sortred mode
-	sorted       bool
-	offset       uint64
-	prev         uint64 // buffered code
-	hasPrev      bool
-	buf2         []byte
-	buf3         []byte
-	ctrlByte     byte
-	nEncodedByte int
-
-	// for taxid
-	includeTaxid     bool
-	bufTaxid         []byte
-	justWrittenACode bool
-	taxidByteLen     int
-	prevTaxid        uint32 // buffered taxid
-	hasPrevTaxid     bool
-}
-
-// NewWriter creates a Writer.
-func NewWriter(w io.Writer, k int, flag uint32) (*Writer, error) {
-	if k == 0 {
-		return nil, ErrKOverflow
-	}
-
-	writer := &Writer{
-		Header: Header{MainVersion: MainVersion, MinorVersion: MinorVersion, K: k, Flag: flag, Number: 0},
-		w:      w,
-	}
-
-	// prevent wrong use of compact
-	if writer.Flag&UnikCompact > 0 && (writer.Flag&UnikSorted > 0 || writer.Flag&UnikHashed > 0) {
-		writer.Flag ^= UnikCompact
-	}
-
-	writer.buf = make([]byte, 8)
-	if writer.Flag&UnikCompact > 0 &&
-		writer.Flag&UnikSorted == 0 &&
-		writer.Flag&UnikHashed == 0 {
-
-		writer.compact = true
-		writer.bufsize = int(k+3) / 4
-	} else if writer.Flag&UnikSorted > 0 {
-		writer.sorted = true
-		writer.buf2 = make([]byte, 16)
-		writer.buf3 = make([]byte, 32)
-	}
-	if writer.Flag&UnikIncludeTaxID > 0 {
-		writer.includeTaxid = true
-		writer.bufTaxid = make([]byte, 4)
-	}
-
-	return writer, nil
-}
-
-// WriteHeader writes file header
-func (writer *Writer) WriteHeader() (err error) {
-	if writer.wroteHeader {
-		return nil
-	}
-	w := writer.w
-
-	// 8 bytes magic number
-	err = binary.Write(w, be, Magic)
-	if err != nil {
-		return err
-	}
-
-	// 4 bytes meta info
-	err = binary.Write(w, be, [4]uint8{writer.MainVersion, MinorVersion, uint8(writer.K), 0})
-	if err != nil {
-		return err
-	}
-
-	// 4 bytes flags
-	err = binary.Write(w, be, writer.Flag)
-	if err != nil {
-		return err
-	}
-
-	// 8 bytes number
-	err = binary.Write(w, be, writer.Number)
-	if err != nil {
-		return err
-	}
-
-	// 4 bytes taxid
-	err = binary.Write(w, be, writer.globalTaxid)
-	if err != nil {
-		return err
-	}
-
-	// 1 byte taxid bytes len
-	if writer.maxTaxid <= 0 {
-		writer.taxidByteLen = 4
-	} else {
-		writer.taxidByteLen = int(byteLength(uint64(writer.maxTaxid)))
-	}
-	err = binary.Write(w, be, uint8(writer.taxidByteLen))
-	if err != nil {
-		return err
-	}
-
-	// description length (2 byte)s and data (128 bytes)
-	lenDesc := len(writer.Description)
-	if lenDesc > descMaxLen {
-		return ErrDescTooLong
-	}
-	err = binary.Write(w, be, uint16(lenDesc))
-	if err != nil {
-		return err
-	}
-	err = binary.Write(w, be, writer.Description)
-	if err != nil {
-		return err
-	}
-
-	// scale
-	err = binary.Write(w, be, writer.Scale)
-	if err != nil {
-		return err
-	}
-
-	// max hash
-	err = binary.Write(w, be, writer.MaxHash)
-	if err != nil {
-		return err
-	}
-
-	// reserved 24 bytes
-	reserved := make([]byte, conservedDataLen)
-	err = binary.Write(w, be, reserved)
-	if err != nil {
-		return err
-	}
-
-	// header has 192 bytes
-
-	writer.wroteHeader = true
-	return nil
-}
-
-// SetGlobalTaxid sets the global taxid
-func (writer *Writer) SetGlobalTaxid(taxid uint32) error {
-	if writer.wroteHeader {
-		return ErrCallLate
-	}
-	writer.globalTaxid = taxid
-	return nil
-}
-
-// SetMaxTaxid set the maxtaxid
-func (writer *Writer) SetMaxTaxid(taxid uint32) error {
-	if writer.wroteHeader {
-		return ErrCallLate
-	}
-	writer.maxTaxid = taxid
-	return nil
-}
-
-// SetScale set the scale
-func (writer *Writer) SetScale(scale uint32) error {
-	if writer.wroteHeader {
-		return ErrCallLate
-	}
-	if writer.Flag&UnikHashed == 0 {
-		writer.Flag |= UnikHashed
-	}
-	if writer.Flag&UnikScaled == 0 {
-		writer.Flag |= UnikScaled
-	}
-	writer.Scale = scale
-	return nil
-}
-
-// SetMaxHash set the max hash
-func (writer *Writer) SetMaxHash(maxHash uint64) error {
-	if writer.wroteHeader {
-		return ErrCallLate
-	}
-	if writer.Flag&UnikHashed == 0 {
-		writer.Flag += UnikHashed
-	}
-	if writer.Flag&UnikScaled == 0 {
-		writer.Flag += UnikScaled
-	}
-	writer.MaxHash = maxHash
-	return nil
-}
-
-// WriteKmer writes one k-mer.
-func (writer *Writer) WriteKmer(mer []byte) error {
-	kcode, err := NewKmerCode(mer)
-	if err != nil {
-		return err
-	}
-	return writer.Write(kcode)
-}
-
-// WriteKmerWithTaxid writes one k-mer and its taxid
-func (writer *Writer) WriteKmerWithTaxid(mer []byte, taxid uint32) error {
-	err := writer.WriteKmer(mer)
-	if err != nil {
-		return nil
-	}
-	return writer.WriteTaxid(taxid)
-}
-
-// Write writes one KmerCode.
-func (writer *Writer) Write(kcode KmerCode) (err error) {
-	if writer.K != kcode.K {
-		return ErrKMismatch
-	}
-	return writer.WriteCode(kcode.Code)
-}
-
-// WriteWithTaxid writes one KmerCode and its taxid.
-// If UnikIncludeTaxID is off, taxid will not be written.
-func (writer *Writer) WriteWithTaxid(kcode KmerCode, taxid uint32) (err error) {
-	err = writer.Write(kcode)
-	if err != nil {
-		return nil
-	}
-	return writer.WriteTaxid(taxid)
-}
-
-// WriteCodeWithTaxid writes a code and its taxid.
-// If UnikIncludeTaxID is off, taxid will not be written.
-func (writer *Writer) WriteCodeWithTaxid(code uint64, taxid uint32) (err error) {
-	err = writer.WriteCode(code)
-	if err != nil {
-		return nil
-	}
-	if !writer.includeTaxid { // if no taxid, just return.
-		return nil
-	}
-	return writer.WriteTaxid(taxid)
-}
-
-// WriteTaxid appends taxid to the code
-func (writer *Writer) WriteTaxid(taxid uint32) (err error) {
-	if !writer.includeTaxid {
-		return ErrCallReadWriteTaxid
-	}
-
-	if !writer.justWrittenACode {
-		return ErrCallOrder
-	}
-
-	if writer.sorted {
-		if !writer.hasPrevTaxid { // write it later
-			writer.prevTaxid = taxid
-			writer.hasPrevTaxid = true
-			writer.justWrittenACode = false
-			return nil
-		}
-		be.PutUint32(writer.bufTaxid, writer.prevTaxid)
-		_, err = writer.w.Write(writer.bufTaxid[4-writer.taxidByteLen:])
-		// fmt.Printf("write taxid: %d, %d\n", writer.prevTaxid, writer.bufTaxid[4-writer.taxidByteLen:])
-
-		be.PutUint32(writer.bufTaxid, taxid)
-		_, err = writer.w.Write(writer.bufTaxid[4-writer.taxidByteLen:])
-		writer.hasPrevTaxid = false
-	} else if writer.compact {
-		be.PutUint32(writer.bufTaxid, taxid)
-		_, err = writer.w.Write(writer.bufTaxid[4-writer.taxidByteLen:])
-	} else {
-		be.PutUint32(writer.bufTaxid, taxid)
-		_, err = writer.w.Write(writer.bufTaxid)
-	}
-
-	writer.justWrittenACode = false
-	return nil
-}
-
-// WriteCode writes one code
-func (writer *Writer) WriteCode(code uint64) (err error) {
-	// lazily write header
-	if !writer.wroteHeader {
-		err = writer.WriteHeader()
-		if err != nil {
-			return err
-		}
-		writer.wroteHeader = true
-	}
-
-	if writer.sorted {
-		if !writer.hasPrev { // write it later
-			writer.prev = code
-			writer.hasPrev = true
-			writer.justWrittenACode = true
-			return nil
-		}
-
-		writer.ctrlByte, writer.nEncodedByte = PutUint64s(writer.buf2, writer.prev-writer.offset, code-writer.prev)
-
-		writer.buf3[0] = writer.ctrlByte
-		copy(writer.buf3[1:writer.nEncodedByte+1], writer.buf2[0:writer.nEncodedByte])
-		_, err = writer.w.Write(writer.buf3[0 : writer.nEncodedByte+1])
-
-		writer.offset = code
-		// writer.prev = 0
-		writer.hasPrev = false
-	} else if writer.compact {
-		be.PutUint64(writer.buf, code)
-		_, err = writer.w.Write(writer.buf[8-writer.bufsize:])
-	} else {
-		be.PutUint64(writer.buf, code)
-		_, err = writer.w.Write(writer.buf)
-	}
-
-	if err != nil {
-		return err
-	}
-	writer.justWrittenACode = true
-	return nil
-}
-
-// Flush write the last k-mer
-func (writer *Writer) Flush() (err error) {
-	if !writer.wroteHeader {
-		writer.Number = 0
-		writer.WriteHeader()
-	}
-	if !writer.sorted || !writer.hasPrev {
-		return nil
-	}
-
-	// write last k-mer
-	err = binary.Write(writer.w, be, uint8(128))
-	if err != nil {
-		return err
-	}
-	err = binary.Write(writer.w, be, writer.prev) // last code
-	if err != nil {
-		return err
-	}
-	if writer.includeTaxid && writer.hasPrevTaxid { // last taxid
-		err = binary.Write(writer.w, be, writer.prevTaxid)
-		if err != nil {
-			return err
-		}
-	}
-
-	writer.hasPrev = false
-	writer.hasPrevTaxid = false
-	return nil
-}
diff -pruN 0.18.8-1/serialization_test.go 0.19.0-1/serialization_test.go
--- 0.18.8-1/serialization_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/serialization_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,153 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"bufio"
-	"bytes"
-	"fmt"
-	"io"
-	"math/rand"
-	"os"
-	"sort"
-	"testing"
-
-	"github.com/shenwei356/util/byteutil"
-)
-
-func genKmers(k int, num int, sorted bool) [][]byte {
-	mers := make([][]byte, num)
-	var j int
-	for i := 0; i < num; i++ {
-		mers[i] = make([]byte, k)
-		for j = 0; j < k; j++ {
-			mers[i][j] = bit2base[rand.Intn(4)]
-		}
-	}
-	sort.Sort(byteutil.SliceOfByteSlice(mers))
-	return mers
-}
-
-// TestWriterReader tests Writer and Writer
-func TestWriter(t *testing.T) {
-	var file string
-
-	var mers, mers2 [][]byte
-	var err error
-
-	ns := []int{10001, 10001, 10001, 10000}
-	for k := 1; k <= 31; k++ {
-		for i, flag := range []uint32{0, UnikCompact, UnikSorted} { //, UnikSorted
-			func(flag uint32) {
-				mers = genKmers(k, ns[i], flag&UnikSorted > 0)
-
-				file = fmt.Sprintf("t.k%d.unik", k)
-
-				err = write(mers, file, flag)
-				if err != nil {
-					t.Error(err)
-				}
-				defer func() {
-					err = os.Remove(file)
-					if err != nil {
-						t.Error(err)
-					}
-				}()
-
-				mers2, err = read(file)
-				if err != nil {
-					t.Error(err)
-				}
-
-				if len(mers2) != len(mers) {
-					t.Errorf("write and read: number err")
-				}
-				for i := 0; i < len(mers); i++ {
-					if !bytes.Equal(mers[i], mers2[i]) {
-						t.Errorf("write and read: data mismatch. %d: %d vs %d", i, mers[i], mers2[i])
-					}
-				}
-			}(flag)
-		}
-	}
-}
-
-func write(mers [][]byte, file string, flag uint32) error {
-	w, err := os.Create(file)
-	if err != nil {
-		return err
-	}
-	defer w.Close()
-
-	outfh := bufio.NewWriter(w)
-	defer outfh.Flush()
-
-	writer, err := NewWriter(outfh, len(mers[0]), flag)
-	if err != nil {
-		return err
-	}
-
-	for _, mer := range mers {
-		err = writer.WriteKmer(mer)
-		if err != nil {
-			return err
-		}
-	}
-	err = writer.Flush()
-	if err != nil {
-		return err
-	}
-
-	return nil
-}
-
-func read(file string) ([][]byte, error) {
-	r, err := os.Open(file)
-	if err != nil {
-		return nil, err
-	}
-	defer r.Close()
-
-	infh := bufio.NewReader(r)
-
-	reader, err := NewReader(infh)
-	if err != nil {
-		return nil, err
-	}
-
-	// fmt.Println(reader.Header)
-
-	mers := make([][]byte, 0, 1000)
-	var kcode KmerCode
-	for {
-		kcode, err = reader.Read()
-		if err != nil {
-			if err == io.EOF {
-				break
-			}
-			return nil, err
-		}
-
-		mers = append(mers, kcode.Bytes())
-	}
-
-	return mers, nil
-}
diff -pruN 0.18.8-1/sketch.go 0.19.0-1/sketch.go
--- 0.18.8-1/sketch.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/sketch.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,506 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"fmt"
-	"sort"
-	"sync"
-
-	"github.com/shenwei356/bio/seq"
-	"github.com/will-rowe/nthash"
-	// hasher "github.com/zeebo/wyhash"
-)
-
-// ErrInvalidS means s >= k.
-var ErrInvalidS = fmt.Errorf("unikmer: invalid s-mer size")
-
-// ErrInvalidW means w < 2 or w > (1<<32)-1
-var ErrInvalidW = fmt.Errorf("unikmer: invalid minimimzer window")
-
-// ErrBufNil means the buffer is nil
-var ErrBufNil = fmt.Errorf("unikmer: buffer slice is nil")
-
-// ErrBufNotEmpty means the buffer has some elements
-var ErrBufNotEmpty = fmt.Errorf("unikmer: buffer has elements")
-
-// Sketch is a k-mer sketch iterator
-type Sketch struct {
-	S        []byte
-	k        int
-	s        int
-	circular bool
-	hasher   *nthash.NTHi
-
-	kMs int // k-s, just for syncmer
-	r   int // L-s
-
-	idx int // current location, 0-based
-	end int
-
-	i, mI     int
-	v, mV     uint64
-	preMinIdx int
-
-	buf     []IdxValue
-	i2v     IdxValue
-	flag    bool
-	t, b, e int
-
-	// ------ just for syncmer -------
-	hasherS           *nthash.NTHi
-	bsyncmerIdx       int
-	lateOutputThisOne bool
-	preMinIdxs        []int
-
-	// ------ just for minimizer -----
-	skip      bool
-	minimizer bool
-	w         int
-}
-
-var poolSketch = &sync.Pool{New: func() interface{} {
-	return &Sketch{}
-}}
-
-// NewMinimizerSketch returns a SyncmerSketch Iterator.
-// It returns the minHashes in all windows of w (w>=1) bp.
-func NewMinimizerSketch(S *seq.Seq, k int, w int, circular bool) (*Sketch, error) {
-	if k < 1 {
-		return nil, ErrInvalidK
-	}
-	if w < 1 || w > (1<<31)-1 {
-		return nil, ErrInvalidW
-	}
-	if len(S.Seq) < k+w-1 {
-		return nil, ErrShortSeq
-	}
-
-	// sketch := &Sketch{S: S.Seq, w: w, k: k, circular: circular}
-	sketch := poolSketch.Get().(*Sketch)
-
-	sketch.minimizer = true
-	sketch.k = k
-	sketch.w = w
-	sketch.circular = circular
-	sketch.skip = w == 1
-
-	var seq2 []byte
-	if circular {
-		seq2 = make([]byte, len(S.Seq), len(S.Seq)+k-1)
-		copy(seq2, S.Seq) // do not edit original sequence
-		seq2 = append(seq2, S.Seq[0:k-1]...)
-		sketch.S = seq2
-	} else {
-		seq2 = S.Seq
-	}
-
-	sketch.idx = 0
-	sketch.end = len(seq2) - 1
-	sketch.r = w - 1 // L-k
-
-	var err error
-	sketch.hasher, err = nthash.NewHasher(&seq2, uint(k))
-	if err != nil {
-		return nil, err
-	}
-
-	if sketch.buf == nil {
-		sketch.buf = make([]IdxValue, 0, w)
-	} else {
-		sketch.buf = sketch.buf[:0]
-	}
-	if sketch.preMinIdxs == nil {
-		sketch.preMinIdxs = make([]int, 0, 8)
-	} else {
-		sketch.preMinIdxs = sketch.preMinIdxs[:0]
-	}
-	sketch.preMinIdx = -1
-
-	return sketch, nil
-}
-
-// NewSyncmerSketch returns a SyncmerSketch Iterator.
-// 1<=s<=k.
-func NewSyncmerSketch(S *seq.Seq, k int, s int, circular bool) (*Sketch, error) {
-	if k < 1 {
-		return nil, ErrInvalidK
-	}
-	if s > k || s == 0 {
-		return nil, ErrInvalidS
-	}
-	if len(S.Seq) < k*2-s-1 {
-		return nil, ErrShortSeq
-	}
-
-	// sketch := &Sketch{S: S.Seq, s: s, k: k, circular: circular}
-	sketch := poolSketch.Get().(*Sketch)
-
-	sketch.minimizer = false
-	sketch.k = k
-	sketch.s = s
-	sketch.circular = circular
-	sketch.skip = s == k
-
-	var seq2 []byte
-	if circular {
-		seq2 = make([]byte, len(S.Seq), len(S.Seq)+k-1)
-		copy(seq2, S.Seq) // do not edit original sequence
-		seq2 = append(seq2, S.Seq[0:k-1]...)
-		sketch.S = seq2
-	} else {
-		seq2 = S.Seq
-	}
-
-	sketch.idx = 0
-	sketch.end = len(seq2) - 2*k + s + 1 // len(sequence) - L (2*k - s - 1)
-	sketch.r = 2*k - s - 1 - s           // L-s
-	sketch.kMs = k - s                   // k-s
-	sketch.w = k - s
-
-	var err error
-	sketch.hasher, err = nthash.NewHasher(&seq2, uint(k))
-	if err != nil {
-		return nil, err
-	}
-
-	sketch.hasherS, err = nthash.NewHasher(&seq2, uint(s))
-	if err != nil {
-		return nil, err
-	}
-
-	if sketch.buf == nil {
-		sketch.buf = make([]IdxValue, 0, (k-s)<<1)
-	} else {
-		sketch.buf = sketch.buf[:0]
-	}
-	if sketch.preMinIdxs == nil {
-		sketch.preMinIdxs = make([]int, 0, 8)
-	} else {
-		sketch.preMinIdxs = sketch.preMinIdxs[:0]
-	}
-	sketch.preMinIdx = -1
-
-	return sketch, nil
-}
-
-// NextMinimizer returns next minimizer.
-func (s *Sketch) NextMinimizer() (code uint64, ok bool) {
-	for {
-		if s.idx > s.end {
-			return 0, false
-		}
-
-		// nthash of current k-mer
-		code, ok = s.hasher.Next(true)
-		if !ok {
-			poolSketch.Put(s)
-			return code, false
-		}
-
-		if s.skip {
-			s.mI = s.idx
-			s.idx++
-			return code, true
-		}
-
-		// in window
-		if s.idx < s.r {
-			s.buf = append(s.buf, IdxValue{Idx: s.idx, Val: code})
-
-			s.idx++
-			continue
-		}
-
-		// end of w
-		if s.idx == s.r {
-			s.buf = append(s.buf, IdxValue{Idx: s.idx, Val: code})
-			sort.Sort(idxValues(s.buf)) // sort
-
-			s.i2v = s.buf[0]
-
-			s.mI, s.mV = s.i2v.Idx, s.i2v.Val
-			s.preMinIdx = s.mI
-
-			s.idx++
-			return s.i2v.Val, true
-		}
-
-		// find min k-mer
-		// remove k-mer not in this window.
-		// have to check position/index one by one
-		for s.i, s.i2v = range s.buf {
-			if s.i2v.Idx == s.idx-s.w {
-				if s.i < s.r {
-					copy(s.buf[s.i:s.r], s.buf[s.i+1:])
-				} // happen to be at the end
-				s.buf = s.buf[:s.r]
-				break
-			}
-		}
-
-		// add new k-mer
-		s.flag = false
-		// using binary search, faster han linear search
-		s.b, s.e = 0, s.r-1
-		for {
-			s.t = s.b + (s.e-s.b)/2
-			if code < s.buf[s.t].Val {
-				s.e = s.t - 1 // end search here
-				if s.e <= s.b {
-					s.flag = true
-					s.i = s.b
-					break
-				}
-			} else {
-				s.b = s.t + 1 // start here
-				if s.b >= s.r {
-					s.flag = false
-					break
-				}
-				if s.b >= s.e {
-					s.flag = true
-					s.i = s.e // right here
-					break
-				}
-			}
-		}
-		if !s.flag { // it's the biggest one, append to the end
-			s.buf = append(s.buf, IdxValue{s.idx, code})
-		} else {
-			if code >= s.buf[s.i].Val { // have to check again
-				s.i++
-			}
-			s.buf = append(s.buf, blankI2V)     // append one element
-			copy(s.buf[s.i+1:], s.buf[s.i:s.r]) // move right
-			s.buf[s.i] = IdxValue{s.idx, code}
-		}
-
-		s.i2v = s.buf[0]
-		if s.i2v.Idx == s.preMinIdx { // deduplicate
-			s.idx++
-			continue
-		}
-
-		s.mI, s.mV = s.i2v.Idx, s.i2v.Val
-		s.preMinIdx = s.mI
-
-		s.idx++
-		return s.i2v.Val, true
-	}
-}
-
-// NextSyncmer returns next syncmer.
-func (s *Sketch) NextSyncmer() (code uint64, ok bool) {
-	for {
-		if s.idx > s.end {
-			return 0, false
-		}
-
-		// nthash of current k-mer
-		code, ok = s.hasher.Next(true)
-		if !ok {
-			poolSketch.Put(s)
-			return code, false
-		}
-
-		// fmt.Printf("\nidx: %d, %s, %d\n", s.idx, s.S[s.idx:s.idx+s.s], code)
-		// fmt.Printf("idx: %d, pres: %v, pre: %d\n", s.idx, s.preMinIdxs, s.preMinIdx)
-
-		if s.skip {
-			s.idx++
-			return code, true
-		}
-
-		if len(s.preMinIdxs) > 0 && s.idx == s.preMinIdxs[0] {
-			// we will output this one in this round
-			s.lateOutputThisOne = true
-		} else {
-			s.lateOutputThisOne = false
-		}
-
-		// find min s-mer
-		if s.idx == 0 {
-			for s.i = s.idx; s.i <= s.idx+s.r; s.i++ {
-				// fmt.Printf("s: %d\n", s.i)
-				s.v, ok = s.hasherS.Next(true)
-				if !ok {
-					return code, false
-				}
-				s.buf = append(s.buf, IdxValue{Idx: s.i, Val: s.v})
-			}
-			sort.Sort(idxValues(s.buf))
-		} else {
-			// remove s-mer not in this window.
-			// have to check position/index one by one
-			for s.i, s.i2v = range s.buf {
-				if s.i2v.Idx == s.idx-1 {
-					if s.i < s.r {
-						copy(s.buf[s.i:s.r], s.buf[s.i+1:])
-					} // happen to be at the end
-					s.buf = s.buf[:s.r]
-					break
-				}
-			}
-
-			// add new s-mer
-			// fmt.Printf("s: %d\n", s.idx+s.r)
-			s.v, ok = s.hasherS.Next(true)
-			if !ok {
-				return code, false
-			}
-			s.flag = false
-			// using binary search, faster han linear search
-			s.b, s.e = 0, s.r-1
-			for {
-				s.t = s.b + (s.e-s.b)/2
-				if s.v < s.buf[s.t].Val {
-					s.e = s.t - 1 // end search here
-					if s.e <= s.b {
-						s.flag = true
-						s.i = s.b
-						break
-					}
-				} else {
-					s.b = s.t + 1 // start here
-					if s.b >= s.r {
-						s.flag = false
-						break
-					}
-					if s.b >= s.e {
-						s.flag = true
-						s.i = s.e // right here
-						break
-					}
-				}
-			}
-			if !s.flag { // it's the biggest one, append to the end
-				s.buf = append(s.buf, IdxValue{s.idx + s.r, s.v})
-			} else {
-				if s.v >= s.buf[s.i].Val { // have to check again
-					s.i++
-				}
-				s.buf = append(s.buf, blankI2V)     // append one element
-				copy(s.buf[s.i+1:], s.buf[s.i:s.r]) // move right
-				s.buf[s.i] = IdxValue{s.idx + s.r, s.v}
-			}
-		}
-
-		s.i2v = s.buf[0]
-		s.mI, s.mV = s.i2v.Idx, s.i2v.Val
-
-		// fmt.Printf("  smer: %d: %d\n", s.mI, s.mV)
-
-		// find the location of bounded syncmer
-		if s.mI-s.idx < s.w { // syncmer at the beginning of kmer
-			s.bsyncmerIdx = s.mI
-			// fmt.Printf("  bIdx: start: %d\n", s.bsyncmerIdx)
-		} else { // at the end
-			s.bsyncmerIdx = s.mI - s.kMs
-			// fmt.Printf("  bIdx:   end: %d\n", s.bsyncmerIdx)
-		}
-
-		// ----------------------------------
-
-		// duplicated
-		if len(s.preMinIdxs) > 0 && s.bsyncmerIdx == s.preMinIdxs[0] {
-			// fmt.Printf("  duplicated:  %d\n", s.bsyncmerIdx)
-			if s.lateOutputThisOne {
-				// remove the first element
-				copy(s.preMinIdxs[0:len(s.preMinIdxs)-1], s.preMinIdxs[1:])
-				s.preMinIdxs = s.preMinIdxs[0 : len(s.preMinIdxs)-1]
-
-				s.idx++
-				s.preMinIdx = s.bsyncmerIdx
-				return code, true
-			}
-
-			s.idx++
-			// s.preMinIdx = s.bsyncmerIdx
-			continue
-		}
-
-		if s.lateOutputThisOne {
-			// remove the first element
-			copy(s.preMinIdxs[0:len(s.preMinIdxs)-1], s.preMinIdxs[1:])
-			s.preMinIdxs = s.preMinIdxs[0 : len(s.preMinIdxs)-1]
-
-			if s.preMinIdx != s.bsyncmerIdx {
-				s.preMinIdxs = append(s.preMinIdxs, s.bsyncmerIdx)
-			}
-			// fmt.Printf("    late2: %d\n", s.preMinIdxs[0])
-
-			s.idx++
-			s.preMinIdx = s.bsyncmerIdx
-			return code, true
-		}
-
-		// is it current kmer?
-		if s.bsyncmerIdx == s.idx {
-			// fmt.Printf("  current: %d\n", s.bsyncmerIdx)
-			if len(s.preMinIdxs) > 0 {
-				// remove the first element
-				copy(s.preMinIdxs[0:len(s.preMinIdxs)-1], s.preMinIdxs[1:])
-				s.preMinIdxs = s.preMinIdxs[0 : len(s.preMinIdxs)-1]
-			}
-			s.idx++
-			s.preMinIdx = s.bsyncmerIdx
-			return code, true
-		}
-
-		if s.preMinIdx != s.bsyncmerIdx {
-			s.preMinIdxs = append(s.preMinIdxs, s.bsyncmerIdx)
-		}
-		// fmt.Printf("  return it later: %d\n", s.bsyncmerIdx)
-		s.idx++
-		s.preMinIdx = s.bsyncmerIdx
-	}
-}
-
-// Next returns next sketch
-func (s *Sketch) Next() (uint64, bool) {
-	if s.minimizer {
-		return s.NextMinimizer()
-	}
-	return s.NextSyncmer()
-}
-
-// Index returns current  0-baesd index
-func (s *Sketch) Index() int {
-	if s.minimizer {
-		return s.mI
-	}
-	return s.idx - 1
-}
-
-// IdxValue is for storing k-mer hash and it's location when computing k-mer sketches.
-type IdxValue struct {
-	Idx int    // index
-	Val uint64 // hash
-}
-
-var blankI2V = IdxValue{0, 0}
-
-type idxValues []IdxValue
-
-func (l idxValues) Len() int               { return len(l) }
-func (l idxValues) Less(i int, j int) bool { return l[i].Val < l[j].Val }
-func (l idxValues) Swap(i int, j int)      { l[i], l[j] = l[j], l[i] }
diff -pruN 0.18.8-1/sketch-protein.go 0.19.0-1/sketch-protein.go
--- 0.18.8-1/sketch-protein.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/sketch-protein.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,214 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"sort"
-	"sync"
-
-	"github.com/shenwei356/bio/seq"
-	"github.com/zeebo/wyhash"
-)
-
-// ProteinMinimizerSketch is a protein k-mer minimizer iterator
-type ProteinMinimizerSketch struct {
-	s *seq.Seq // amino acid
-
-	k    int
-	end0 int
-	idx  int
-
-	// ----------------------
-
-	skip bool
-	w    int
-
-	end int
-	r   int // L-s
-
-	i, mI     int
-	mV        uint64
-	preMinIdx int
-
-	buf     []IdxValue
-	i2v     IdxValue
-	flag    bool
-	t, b, e int
-}
-
-var poolProteinMinimizerSketch = &sync.Pool{New: func() interface{} {
-	return &ProteinMinimizerSketch{}
-}}
-
-// NewProteinMinimizerSketch returns a ProteinMinimizerSketch
-func NewProteinMinimizerSketch(S *seq.Seq, k int, codonTable int, frame int, w int) (*ProteinMinimizerSketch, error) {
-	if k < 1 {
-		return nil, ErrInvalidK
-	}
-	if len(S.Seq) < k*3 {
-		return nil, ErrShortSeq
-	}
-
-	if w < 1 || w > (1<<31)-1 {
-		return nil, ErrInvalidW
-	}
-	if len(S.Seq) < k*3+w-1 {
-		return nil, ErrShortSeq
-	}
-
-	// s := &ProteinMinimizerSketch{s0: S, k: k, w: w}
-	s := poolProteinMinimizerSketch.Get().(*ProteinMinimizerSketch)
-	s.k = k
-	s.w = w
-
-	var err error
-	if S.Alphabet != seq.Protein {
-		s.s, err = S.Translate(codonTable, frame, false, false, true, false)
-		if err != nil {
-			return nil, err
-		}
-	} else {
-		s.s = S
-	}
-
-	s.idx = 0
-	s.end0 = len(s.s.Seq) - k
-
-	s.skip = w == 1
-	s.end = len(s.s.Seq) - 1
-	s.r = w - 1 // L-k
-
-	s.buf = make([]IdxValue, 0, w)
-	s.preMinIdx = -1
-
-	return s, nil
-}
-
-// Next returns next hash value
-func (s *ProteinMinimizerSketch) Next() (code uint64, ok bool) {
-	for {
-		// if s.idx > s.end {
-		// 	return 0, false
-		// }
-
-		if s.idx > s.end0 {
-			poolProteinIterator.Put(s)
-			return 0, false
-		}
-
-		code = wyhash.Hash(s.s.Seq[s.idx:s.idx+s.k], 1)
-
-		if s.skip {
-			s.mI = s.idx
-			s.idx++
-			return code, true
-		}
-
-		// in window
-		if s.idx < s.r {
-			s.buf = append(s.buf, IdxValue{Idx: s.idx, Val: code})
-
-			s.idx++
-			continue
-		}
-
-		// end of w
-		if s.idx == s.r {
-			s.buf = append(s.buf, IdxValue{Idx: s.idx, Val: code})
-			sort.Sort(idxValues(s.buf)) // sort
-
-			s.i2v = s.buf[0]
-
-			s.mI, s.mV = s.i2v.Idx, s.i2v.Val
-			s.preMinIdx = s.mI
-
-			s.idx++
-			return s.i2v.Val, true
-		}
-
-		// find min k-mer
-		// remove k-mer not in this window.
-		// have to check position/index one by one
-		for s.i, s.i2v = range s.buf {
-			if s.i2v.Idx == s.idx-s.w {
-				if s.i < s.r {
-					copy(s.buf[s.i:s.r], s.buf[s.i+1:])
-				} // happen to be at the end
-				s.buf = s.buf[:s.r]
-				break
-			}
-		}
-
-		// add new k-mer
-		s.flag = false
-		// using binary search, faster han linear search
-		s.b, s.e = 0, s.r-1
-		for {
-			s.t = s.b + (s.e-s.b)/2
-			if code < s.buf[s.t].Val {
-				s.e = s.t - 1 // end search here
-				if s.e <= s.b {
-					s.flag = true
-					s.i = s.b
-					break
-				}
-			} else {
-				s.b = s.t + 1 // start here
-				if s.b >= s.r {
-					s.flag = false
-					break
-				}
-				if s.b >= s.e {
-					s.flag = true
-					s.i = s.e // right here
-					break
-				}
-			}
-		}
-		if !s.flag { // it's the biggest one, append to the end
-			s.buf = append(s.buf, IdxValue{s.idx, code})
-		} else {
-			if code >= s.buf[s.i].Val { // have to check again
-				s.i++
-			}
-			s.buf = append(s.buf, blankI2V)     // append one element
-			copy(s.buf[s.i+1:], s.buf[s.i:s.r]) // move right
-			s.buf[s.i] = IdxValue{s.idx, code}
-		}
-
-		s.i2v = s.buf[0]
-		if s.i2v.Idx == s.preMinIdx { // deduplicate
-			s.idx++
-			continue
-		}
-
-		s.mI, s.mV = s.i2v.Idx, s.i2v.Val
-		s.preMinIdx = s.mI
-
-		s.idx++
-		return s.i2v.Val, true
-	}
-}
-
-// Index returns current 0-baesd index.
-func (s *ProteinMinimizerSketch) Index() int {
-	return s.mI
-}
diff -pruN 0.18.8-1/sketch-protein_test.go 0.19.0-1/sketch-protein_test.go
--- 0.18.8-1/sketch-protein_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/sketch-protein_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,58 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"testing"
-
-	"github.com/shenwei356/bio/seq"
-)
-
-func TestProteinMinimizer(t *testing.T) {
-	_s := "AAGTTTGAATCATTCAACTATCTAGTTTTCAGAGAACAATGTTCTCTAAAGAATAGAAAAGAGTCATTGTGCGGTGATGATGGCGGGAAGGATCCACCTG"
-	sequence, err := seq.NewSeq(seq.DNA, []byte(_s))
-	if err != nil {
-		t.Errorf("fail to create sequence: %s", _s)
-	}
-	k := 10
-	w := 3
-
-	sketch, err := NewProteinMinimizerSketch(sequence, k, 1, 1, w)
-	if err != nil {
-		t.Errorf("fail to create minizimer sketch")
-	}
-
-	var code uint64
-	var ok bool
-	// var idx int
-	codes := make([]uint64, 0, 1024)
-	for {
-		code, ok = sketch.Next()
-		if !ok {
-			break
-		}
-
-		// idx = sketch.Index()
-		// fmt.Printf("aa: %d-%s, %d\n", idx, sketch.s.Seq[idx:idx+k], code)
-
-		codes = append(codes, code)
-	}
-}
diff -pruN 0.18.8-1/sketch_test.go 0.19.0-1/sketch_test.go
--- 0.18.8-1/sketch_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/sketch_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,218 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"testing"
-
-	"github.com/shenwei356/bio/seq"
-	"github.com/shenwei356/util/bytesize"
-)
-
-var _syncmer uint64
-var _syncmerIdx int
-
-func TestMinimizer(t *testing.T) {
-	_s := "GGCAAGTTCGTCA"
-	// _s := "GGCAAGTTC"
-	sequence, err := seq.NewSeq(seq.DNA, []byte(_s))
-	if err != nil {
-		t.Errorf("fail to create sequence: %s", _s)
-	}
-	k := 5
-	w := 3
-
-	sketch, err := NewMinimizerSketch(sequence, k, w, false)
-	if err != nil {
-		t.Errorf("fail to create minizimer sketch")
-	}
-
-	var code uint64
-	var ok bool
-	var idx int
-	codes := make([]uint64, 0, 1024)
-	for {
-		code, ok = sketch.NextMinimizer()
-		if !ok {
-			break
-		}
-
-		idx = sketch.Index()
-
-		_syncmerIdx = idx
-		_syncmer = code
-
-		codes = append(codes, code)
-		// fmt.Printf("minizimer: %d-%s, %d\n", idx, _s[idx:idx+k], code)
-	}
-
-	if len(codes) == 5 &&
-		codes[0] == 973456138564179607 &&
-		codes[1] == 2645801399420473919 &&
-		codes[2] == 1099502864234245338 &&
-		codes[3] == 6763474888237448943 &&
-		codes[4] == 2737971715116251183 {
-	} else {
-		t.Errorf("minizimer error")
-	}
-}
-
-func TestSyncmer(t *testing.T) {
-	_s := "GGCAAGTTCGTCATCGATC"
-	// _s := "GGCAAGTTC"
-	sequence, err := seq.NewSeq(seq.DNA, []byte(_s))
-	if err != nil {
-		t.Errorf("fail to create sequence: %s", _s)
-	}
-	k := 5
-	s := 2
-
-	sketch, err := NewSyncmerSketch(sequence, k, s, false)
-	if err != nil {
-		t.Errorf("fail to create syncmer sketch")
-	}
-	var code uint64
-	var ok bool
-	var idx int
-	codes := make([]uint64, 0, 1024)
-	for {
-		code, ok = sketch.NextSyncmer()
-		// fmt.Println(sketch.Index(), code, ok)
-		if !ok {
-			break
-		}
-
-		idx = sketch.Index()
-
-		_syncmerIdx = idx
-		_syncmer = code
-
-		codes = append(codes, code)
-		// fmt.Printf("syncmer: %d-%s, %d\n", idx, _s[idx:idx+k], code)
-	}
-	// if len(codes) == 5 &&
-	// 	codes[0] == 7385093395039290540 &&
-	// 	codes[1] == 1099502864234245338 {
-	// } else {
-	// 	t.Errorf("syncmer error")
-	// }
-}
-
-func BenchmarkMinimizerSketch(b *testing.B) {
-	for i := range benchSeqs {
-		size := len(benchSeqs[i].Seq)
-		b.Run(bytesize.ByteSize(size).String(), func(b *testing.B) {
-			var code uint64
-			var ok bool
-			// var n int
-
-			for j := 0; j < b.N; j++ {
-				iter, err := NewMinimizerSketch(benchSeqs[i], 31, 15, false)
-				if err != nil {
-					b.Errorf("fail to create minizimer sketch. seq length: %d", size)
-				}
-
-				// n = 0
-				for {
-					code, ok = iter.NextMinimizer()
-					if !ok {
-						break
-					}
-
-					// fmt.Printf("minizimer: %d-%d\n", iter.Index(), code)
-
-					_code = code
-					// n++
-				}
-
-			}
-			// fmt.Printf("minizimer for %s DNA, c=%.6f\n", bytesize.ByteSize(size).String(), float64(size)/float64(n))
-		})
-	}
-}
-
-// go test -v -test.bench=BenchmarkSyncmerSketch -cpuprofile profile.out -test.run=damnit
-// go tool pprof -http=:8080 profile.out
-func BenchmarkSyncmerSketch(b *testing.B) {
-	for i := range benchSeqs {
-		size := len(benchSeqs[i].Seq)
-		b.Run(bytesize.ByteSize(size).String(), func(b *testing.B) {
-			var code uint64
-			var ok bool
-			// var n int
-
-			for j := 0; j < b.N; j++ {
-				iter, err := NewSyncmerSketch(benchSeqs[i], 31, 16, false)
-				if err != nil {
-					b.Errorf("fail to create syncmer sketch. seq length: %d", size)
-				}
-
-				// n = 0
-				for {
-					code, ok = iter.NextSyncmer()
-					if !ok {
-						break
-					}
-
-					// fmt.Printf("syncmer: %d-%d\n", iter.Index(), code)
-
-					_code = code
-					// n++
-				}
-
-			}
-			// fmt.Printf("syncmer for %s DNA, c=%.6f\n", bytesize.ByteSize(size).String(), float64(size)/float64(n))
-		})
-	}
-}
-
-func BenchmarkProteinMinimizerSketch(b *testing.B) {
-	for i := range benchSeqs {
-		size := len(benchSeqs[i].Seq)
-		b.Run(bytesize.ByteSize(size).String(), func(b *testing.B) {
-			var code uint64
-			var ok bool
-			// var n int
-
-			for j := 0; j < b.N; j++ {
-				iter, err := NewProteinMinimizerSketch(benchSeqs[i], 10, 1, 1, 5)
-				if err != nil {
-					b.Errorf("fail to create minizimer sketch. seq length: %d", size)
-				}
-
-				// n = 0
-				for {
-					code, ok = iter.Next()
-					if !ok {
-						break
-					}
-
-					// fmt.Printf("minizimer: %d-%d\n", iter.Index(), code)
-
-					_code = code
-					// n++
-				}
-
-			}
-			// fmt.Printf("minizimer for %s Protein, c=%.6f\n", bytesize.ByteSize(size).String(), float64(size)/float64(n))
-		})
-	}
-}
diff -pruN 0.18.8-1/taxonomy.go 0.19.0-1/taxonomy.go
--- 0.18.8-1/taxonomy.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/taxonomy.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,692 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"bufio"
-	"errors"
-	"fmt"
-	"strconv"
-	"strings"
-	"sync"
-
-	"github.com/shenwei356/xopen"
-)
-
-// Taxonomy holds relationship of taxon in a taxonomy.
-type Taxonomy struct {
-	file     string
-	rootNode uint32
-
-	Nodes      map[uint32]uint32 // child -> parent
-	DelNodes   map[uint32]struct{}
-	MergeNodes map[uint32]uint32 // from -> to
-	Names      map[uint32]string
-
-	taxid2rankid map[uint32]uint8 // taxid -> rank id
-	ranks        []string         // rank id -> rank
-	Ranks        map[string]interface{}
-
-	hasRanks      bool
-	hasDelNodes   bool
-	hasMergeNodes bool
-	hasNames      bool
-
-	cacheLCA bool
-	lcaCache sync.Map
-
-	maxTaxid uint32
-}
-
-// ErrIllegalColumnIndex means column index is 0 or negative.
-var ErrIllegalColumnIndex = errors.New("unikmer: illegal column index, positive integer needed")
-
-// ErrRankNotLoaded means you should reate load Taxonomy with NewTaxonomyWithRank before calling some methods.
-var ErrRankNotLoaded = errors.New("unikmer: taxonomic ranks not loaded, please call: NewTaxonomyWithRank")
-
-// ErrNamesNotLoaded means you should call LoadNames before using taxonomy names.
-var ErrNamesNotLoaded = errors.New("unikmer: taxonomy names not loaded, please call: LoadNames")
-
-// ErrTooManyRanks means number of ranks exceed limit of 255
-var ErrTooManyRanks = errors.New("unikmer: number of ranks exceed limit of 255")
-
-// ErrUnkownRank indicate an unknown rank
-var ErrUnkownRank = errors.New("unikmer: unknown rank")
-
-// NewTaxonomyFromNCBI parses nodes relationship from nodes.dmp
-// from ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz .
-func NewTaxonomyFromNCBI(file string) (*Taxonomy, error) {
-	return NewTaxonomy(file, 1, 3)
-}
-
-// NewTaxonomy only loads nodes from nodes.dmp file.
-func NewTaxonomy(file string, childColumn int, parentColumn int) (*Taxonomy, error) {
-	if childColumn < 1 || parentColumn < 1 {
-		return nil, ErrIllegalColumnIndex
-	}
-
-	maxColumns := maxInt(childColumn, parentColumn)
-
-	fh, err := xopen.Ropen(file)
-	if err != nil {
-		return nil, fmt.Errorf("unikmer: %s", err)
-	}
-	defer func() {
-		fh.Close()
-	}()
-
-	nodes := make(map[uint32]uint32, 1024)
-
-	n := maxColumns + 1
-
-	childColumn--
-	parentColumn--
-
-	items := make([]string, n)
-	scanner := bufio.NewScanner(fh)
-	var _child, _parent int
-	var child, parent uint32
-	var maxTaxid uint32
-	var root uint32
-	for scanner.Scan() {
-		stringSplitN(scanner.Text(), "\t", n, &items)
-		if len(items) < n {
-			continue
-		}
-
-		_child, err = strconv.Atoi(items[childColumn])
-		if err != nil {
-			continue
-		}
-
-		_parent, err = strconv.Atoi(items[parentColumn])
-		if err != nil {
-			continue
-		}
-
-		child, parent = uint32(_child), uint32(_parent)
-
-		// ----------------------------------
-
-		nodes[child] = parent
-
-		if child == parent {
-			root = child
-		}
-		if child > maxTaxid {
-			maxTaxid = child
-		}
-
-	}
-	if err := scanner.Err(); err != nil {
-		return nil, fmt.Errorf("unikmer: %s", err)
-	}
-
-	return &Taxonomy{file: file, Nodes: nodes, rootNode: root, maxTaxid: maxTaxid}, nil
-}
-
-// NewTaxonomyWithRankFromNCBI parses Taxonomy from nodes.dmp
-// from ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz .
-func NewTaxonomyWithRankFromNCBI(file string) (*Taxonomy, error) {
-	return NewTaxonomyWithRank(file, 1, 3, 5)
-}
-
-// NewTaxonomyWithRank loads nodes and ranks from nodes.dmp file.
-func NewTaxonomyWithRank(file string, childColumn int, parentColumn int, rankColumn int) (*Taxonomy, error) {
-	if childColumn < 1 || parentColumn < 1 || rankColumn < 1 {
-		return nil, ErrIllegalColumnIndex
-	}
-
-	maxColumns := maxInt(childColumn, parentColumn, rankColumn)
-
-	taxid2rankid := make(map[uint32]uint8, 1024)
-	ranks := make([]string, 0, 128)
-	rank2rankid := make(map[string]int, 128)
-	ranksMap := make(map[string]interface{}, 128)
-
-	fh, err := xopen.Ropen(file)
-	if err != nil {
-		return nil, fmt.Errorf("unikmer: %s", err)
-	}
-	defer func() {
-		fh.Close()
-	}()
-
-	nodes := make(map[uint32]uint32, 1024)
-
-	n := maxColumns + 1
-
-	childColumn--
-	parentColumn--
-	rankColumn--
-
-	items := make([]string, n)
-	scanner := bufio.NewScanner(fh)
-	var _child, _parent int
-	var child, parent uint32
-	var maxTaxid uint32
-	var rank string
-	var ok bool
-	var rankid int
-	var root uint32
-	for scanner.Scan() {
-		stringSplitN(scanner.Text(), "\t", n, &items)
-		if len(items) < n {
-			continue
-		}
-
-		_child, err = strconv.Atoi(items[childColumn])
-		if err != nil {
-			continue
-		}
-
-		_parent, err = strconv.Atoi(items[parentColumn])
-		if err != nil {
-			continue
-		}
-
-		child, parent, rank = uint32(_child), uint32(_parent), items[rankColumn]
-
-		// ----------------------------------
-
-		nodes[child] = parent
-
-		if child == parent {
-			root = child
-		}
-		if child > maxTaxid {
-			maxTaxid = child
-		}
-
-		if rankid, ok = rank2rankid[rank]; ok {
-			taxid2rankid[child] = uint8(rankid)
-		} else {
-			ranks = append(ranks, rank)
-			if len(ranks) > 255 {
-				return nil, ErrTooManyRanks
-			}
-			rank2rankid[rank] = len(ranks) - 1
-			taxid2rankid[child] = uint8(len(ranks) - 1)
-			ranksMap[rank] = struct{}{}
-		}
-
-	}
-	if err := scanner.Err(); err != nil {
-		return nil, fmt.Errorf("unikmer: %s", err)
-	}
-
-	return &Taxonomy{file: file, Nodes: nodes, rootNode: root, maxTaxid: maxTaxid,
-		taxid2rankid: taxid2rankid, ranks: ranks, hasRanks: true, Ranks: ranksMap}, nil
-}
-
-// Rank returns rank of a taxid.
-func (t *Taxonomy) Rank(taxid uint32) string {
-	if !t.hasRanks {
-		panic(ErrRankNotLoaded)
-	}
-	if i, ok := t.taxid2rankid[taxid]; ok {
-		return t.ranks[int(i)]
-	}
-	return "" // taxid not found int db
-}
-
-// AtOrBelowRank returns whether a taxid is at or below one rank.
-func (t *Taxonomy) AtOrBelowRank(taxid uint32, rank string) bool {
-	if !t.hasRanks {
-		panic(ErrRankNotLoaded)
-	}
-	var ok bool
-	var i uint8
-
-	rank = strings.ToLower(rank)
-	if _, ok = t.Ranks[rank]; !ok {
-		return false
-	}
-
-	if i, ok = t.taxid2rankid[taxid]; ok {
-		if rank == t.ranks[int(i)] {
-			return true
-		}
-	}
-
-	// continue searching towards to root node
-	var child, parent, newtaxid uint32
-
-	child = taxid
-	for {
-		parent, ok = t.Nodes[child]
-		if !ok { // taxid not found
-			// check if it was deleted
-			if _, ok = t.DelNodes[child]; ok {
-				return false
-			}
-			// check if it was merged
-			if newtaxid, ok = t.MergeNodes[child]; ok {
-				child = newtaxid
-				parent = t.Nodes[child]
-			} else { // not found
-				return false
-			}
-		}
-
-		if parent == 1 {
-			break
-		}
-
-		if rank == t.ranks[t.taxid2rankid[parent]] {
-			return true
-		}
-
-		child = parent
-	}
-
-	return false
-}
-
-// LoadNamesFromNCBI loads scientific names from NCBI names.dmp
-func (t *Taxonomy) LoadNamesFromNCBI(file string) error {
-	return t.LoadNames(file, 1, 3, 7, "scientific name")
-}
-
-// LoadNames loads names.
-func (t *Taxonomy) LoadNames(file string, taxidColumn int, nameColumn int, typeColumn int, _type string) error {
-	if taxidColumn < 1 || nameColumn < 1 || typeColumn < 1 {
-		return ErrIllegalColumnIndex
-	}
-
-	maxColumns := maxInt(nameColumn, nameColumn, typeColumn)
-
-	fh, err := xopen.Ropen(file)
-	if err != nil {
-		return fmt.Errorf("unikmer: %s", err)
-	}
-	defer func() {
-		fh.Close()
-	}()
-
-	m := make(map[uint32]string, 1024)
-
-	n := maxColumns + 1
-
-	taxidColumn--
-	nameColumn--
-	typeColumn--
-
-	filterByType := _type != ""
-
-	items := make([]string, n)
-	scanner := bufio.NewScanner(fh)
-	var taxid uint64
-	for scanner.Scan() {
-		stringSplitN(scanner.Text(), "\t", n, &items)
-		if len(items) < n {
-			continue
-		}
-
-		if filterByType && items[typeColumn] != _type {
-			continue
-		}
-
-		taxid, err = strconv.ParseUint(items[taxidColumn], 10, 32)
-		if err != nil {
-			continue
-		}
-
-		m[uint32(taxid)] = items[nameColumn]
-	}
-	if err := scanner.Err(); err != nil {
-		return fmt.Errorf("unikmer: %s", err)
-	}
-
-	t.Names = m
-	t.hasNames = true
-	return nil
-}
-
-// LoadMergedNodesFromNCBI loads merged nodes from  NCBI merged.dmp.
-func (t *Taxonomy) LoadMergedNodesFromNCBI(file string) error {
-	return t.LoadMergedNodes(file, 1, 3)
-}
-
-// LoadMergedNodes loads merged nodes.
-func (t *Taxonomy) LoadMergedNodes(file string, oldColumn int, newColumn int) error {
-	if oldColumn < 1 || newColumn < 1 {
-		return ErrIllegalColumnIndex
-	}
-
-	maxColumns := maxInt(oldColumn, newColumn)
-
-	fh, err := xopen.Ropen(file)
-	if err != nil {
-		return fmt.Errorf("unikmer: %s", err)
-	}
-	defer func() {
-		fh.Close()
-	}()
-
-	m := make(map[uint32]uint32, 1024)
-
-	n := maxColumns + 1
-
-	oldColumn--
-	newColumn--
-
-	items := make([]string, n)
-	scanner := bufio.NewScanner(fh)
-	var from, to int
-	for scanner.Scan() {
-		stringSplitN(scanner.Text(), "\t", n, &items)
-		if len(items) < n {
-			continue
-		}
-		from, err = strconv.Atoi(items[oldColumn])
-		if err != nil {
-			continue
-		}
-		to, err = strconv.Atoi(items[newColumn])
-		if err != nil {
-			continue
-		}
-
-		m[uint32(from)] = uint32(to)
-	}
-	if err := scanner.Err(); err != nil {
-		return fmt.Errorf("unikmer: %s", err)
-	}
-
-	t.MergeNodes = m
-	t.hasMergeNodes = true
-	return nil
-}
-
-// LoadDeletedNodesFromNCBI loads deleted nodes from NCBI delnodes.dmp.
-func (t *Taxonomy) LoadDeletedNodesFromNCBI(file string) error {
-	return t.LoadDeletedNodes(file, 1)
-}
-
-// LoadDeletedNodes loads deleted nodes.
-func (t *Taxonomy) LoadDeletedNodes(file string, column int) error {
-	if column < 1 {
-		return ErrIllegalColumnIndex
-	}
-
-	fh, err := xopen.Ropen(file)
-	if err != nil {
-		return fmt.Errorf("unikmer: %s", err)
-	}
-	defer func() {
-		fh.Close()
-	}()
-
-	m := make(map[uint32]struct{}, 1024)
-
-	n := column + 1
-	column--
-	items := make([]string, n)
-	scanner := bufio.NewScanner(fh)
-	var id int
-	for scanner.Scan() {
-		stringSplitN(scanner.Text(), "\t", n, &items)
-		if len(items) < n {
-			continue
-		}
-		id, err = strconv.Atoi(items[column])
-		if err != nil {
-			continue
-		}
-
-		m[uint32(id)] = struct{}{}
-	}
-	if err := scanner.Err(); err != nil {
-		return fmt.Errorf("unikmer: %s", err)
-	}
-
-	t.DelNodes = m
-	t.hasDelNodes = true
-	return nil
-}
-
-// MaxTaxid returns maximum taxid
-func (t *Taxonomy) MaxTaxid() uint32 {
-	return t.maxTaxid
-}
-
-// CacheLCA tells to cache every LCA query result
-func (t *Taxonomy) CacheLCA() {
-	t.cacheLCA = true
-}
-
-// LCA returns the Lowest Common Ancestor of two nodes, 0 for unknown taxid.
-func (t *Taxonomy) LCA(a uint32, b uint32) uint32 {
-	if a == 0 || b == 0 {
-		return 0
-	}
-	if a == b {
-		return a
-	}
-
-	// check cache
-	var ok bool
-
-	var query uint64
-	var tmp interface{}
-	if t.cacheLCA {
-		query = pack2uint32(a, b)
-
-		tmp, ok = t.lcaCache.Load(query)
-		if ok {
-			return tmp.(uint32)
-		}
-	}
-
-	mA := make(map[uint32]struct{}, 16)
-
-	var child, parent, newTaxid uint32
-	var flag bool
-
-	child = a
-	for {
-		parent, ok = t.Nodes[child]
-		if !ok {
-			flag = false
-			if t.hasMergeNodes { // merged?
-				if newTaxid, ok = t.MergeNodes[child]; ok { // merged
-					child = newTaxid // update child
-
-					parent, ok = t.Nodes[child]
-					if ok {
-						flag = true
-					}
-				}
-			}
-
-			if !flag {
-				if t.cacheLCA {
-					t.lcaCache.Store(query, uint32(0))
-				}
-				return 0
-			}
-		}
-		if parent == child { // root
-			mA[parent] = struct{}{}
-			break
-		}
-		if parent == b { // b is ancestor of a
-			if t.cacheLCA {
-				t.lcaCache.Store(query, b)
-			}
-			return b
-		}
-		mA[parent] = struct{}{}
-
-		child = parent
-	}
-
-	child = b
-	for {
-		parent, ok = t.Nodes[child]
-		if !ok {
-			flag = false
-			if t.hasMergeNodes { // merged?
-				if newTaxid, ok = t.MergeNodes[child]; ok { // merged
-					child = newTaxid // update child
-
-					parent, ok = t.Nodes[child]
-					if ok {
-						flag = true
-					}
-				}
-			}
-
-			if !flag {
-				if t.cacheLCA {
-					t.lcaCache.Store(query, uint32(0))
-				}
-				return 0
-			}
-		}
-
-		if parent == child { // root
-			break
-		}
-		if parent == a { // a is ancestor of b
-			if t.cacheLCA {
-				t.lcaCache.Store(query, a)
-			}
-			return a
-		}
-		if _, ok = mA[parent]; ok {
-			if t.cacheLCA {
-				t.lcaCache.Store(query, parent)
-			}
-			return parent
-		}
-
-		child = parent
-	}
-	return t.rootNode
-}
-
-// LineageNames returns nodes' names of the the complete lineage.
-func (t *Taxonomy) LineageNames(taxid uint32) []string {
-	taxids := t.LineageTaxIds(taxid)
-	if taxids == nil {
-		return nil
-	}
-
-	if !t.hasNames {
-		panic(ErrNamesNotLoaded)
-	}
-
-	names := make([]string, len(taxids))
-	for i, tax := range taxids {
-		names[i] = t.Names[tax]
-	}
-	return names
-}
-
-// LineageTaxIds returns nodes' taxid of the the complete lineage.
-func (t *Taxonomy) LineageTaxIds(taxid uint32) []uint32 {
-	var child, parent, newtaxid uint32
-	var ok bool
-
-	child = taxid
-	list := make([]uint32, 0, 16)
-	for {
-		parent, ok = t.Nodes[child]
-		if !ok { // taxid not found
-			// check if it was deleted
-			if _, ok = t.DelNodes[child]; ok {
-				return nil
-			}
-			// check if it was merged
-			if newtaxid, ok = t.MergeNodes[child]; ok {
-				child = newtaxid
-				parent = t.Nodes[child]
-			} else { // not found
-				return nil
-			}
-		}
-
-		list = append(list, child)
-
-		if parent == 1 {
-			break
-		}
-		child = parent
-	}
-
-	// reversing
-	for i, j := 0, len(list)-1; i < j; i, j = i+1, j-1 {
-		list[i], list[j] = list[j], list[i]
-	}
-
-	return list
-}
-
-func pack2uint32(a uint32, b uint32) uint64 {
-	if a < b {
-		return (uint64(a) << 32) | uint64(b)
-	}
-	return (uint64(b) << 32) | uint64(a)
-}
-
-func minInt(a int, vals ...int) int {
-	min := a
-	for _, v := range vals {
-		if v < min {
-			min = v
-		}
-	}
-	return min
-}
-
-func maxInt(a int, vals ...int) int {
-	min := a
-	for _, v := range vals {
-		if v > min {
-			min = v
-		}
-	}
-	return min
-}
-
-func stringSplitN(s string, sep string, n int, a *[]string) {
-	if a == nil {
-		tmp := make([]string, n)
-		a = &tmp
-	}
-
-	n--
-	i := 0
-	for i < n {
-		m := strings.Index(s, sep)
-		if m < 0 {
-			break
-		}
-		(*a)[i] = s[:m]
-		s = s[m+len(sep):]
-		i++
-	}
-	(*a)[i] = s
-
-	(*a) = (*a)[:i+1]
-}
diff -pruN 0.18.8-1/taxonomy_test.go 0.19.0-1/taxonomy_test.go
--- 0.18.8-1/taxonomy_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/taxonomy_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,44 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"testing"
-)
-
-func TestPackTwoTaxids(t *testing.T) {
-	type Test struct {
-		a, b uint32
-		c    uint64
-	}
-	tests := []Test{
-		{0, 0, 0},
-		{1, 1, 1<<32 + 1},
-		{2, 1, 1<<32 + 2},
-	}
-
-	for _, test := range tests {
-		c := pack2uint32(test.a, test.b)
-		if c != test.c {
-			t.Errorf("pack2uint32 error: %d != %d ", c, test.c)
-		}
-	}
-}
diff -pruN 0.18.8-1/unikmer/cmd/autocomplete.go 0.19.0-1/unikmer/cmd/autocomplete.go
--- 0.18.8-1/unikmer/cmd/autocomplete.go	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/autocomplete.go	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,99 @@
+// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package cmd
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+
+	homedir "github.com/mitchellh/go-homedir"
+	"github.com/shenwei356/util/pathutil"
+	"github.com/spf13/cobra"
+)
+
+// genautocompleteCmd represents the fq2fa command
+var genautocompleteCmd = &cobra.Command{
+	Use:   "autocompletion",
+	Short: "Generate shell autocompletion script (bash|zsh|fish|powershell)",
+	Long: `Generate shell autocompletion script
+
+Supported shell: bash|zsh|fish|powershell
+
+Bash:
+
+    # generate completion shell
+    unikmer autocompletion --shell bash
+
+    # configure if never did.
+    # install bash-completion if the "complete" command is not found.
+    echo "for bcfile in ~/.bash_completion.d/* ; do source \$bcfile; done" >> ~/.bash_completion
+    echo "source ~/.bash_completion" >> ~/.bashrc
+
+Zsh:
+
+    # generate completion shell
+    unikmer autocompletion --shell zsh --file ~/.zfunc/_unikmer
+
+    # configure if never did
+    echo 'fpath=( ~/.zfunc "${fpath[@]}" )' >> ~/.zshrc
+    echo "autoload -U compinit; compinit" >> ~/.zshrc
+
+fish:
+
+    unikmer autocompletion --shell fish --file ~/.config/fish/completions/unikmer.fish
+
+`,
+	Run: func(cmd *cobra.Command, args []string) {
+		outfile := getFlagString(cmd, "file")
+		shell := getFlagString(cmd, "shell")
+
+		dir := filepath.Dir(outfile)
+		ok, err := pathutil.DirExists(dir)
+		checkError(err)
+		if !ok {
+			os.MkdirAll(dir, 0744)
+		}
+
+		switch shell {
+		case "bash":
+			checkError(cmd.Root().GenBashCompletionFile(outfile))
+		case "zsh":
+			checkError(cmd.Root().GenZshCompletionFile(outfile))
+		case "fish":
+			checkError(cmd.Root().GenFishCompletionFile(outfile, true))
+		case "powershell":
+			checkError(cmd.Root().GenPowerShellCompletionFile(outfile))
+		default:
+			checkError(fmt.Errorf("unsupported shell: %s", shell))
+		}
+
+		log.Infof("%s completion file for unikmer saved to %s", shell, outfile)
+	},
+}
+
+func init() {
+	RootCmd.AddCommand(genautocompleteCmd)
+	defaultCompletionFile, err := homedir.Expand("~/.bash_completion.d/unikmer.sh")
+	checkError(err)
+	genautocompleteCmd.Flags().StringP("file", "", defaultCompletionFile, "autocompletion file")
+	genautocompleteCmd.Flags().StringP("shell", "", "bash", "autocompletion type (bash|zsh|fish|powershell)")
+}
diff -pruN 0.18.8-1/unikmer/cmd/common.go 0.19.0-1/unikmer/cmd/common.go
--- 0.18.8-1/unikmer/cmd/common.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/common.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,9 @@ import (
 	"os"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 	"github.com/twotwotwo/sorts/sortutil"
 )
@@ -101,7 +103,7 @@ Tips:
 			}
 		}
 
-		var taxondb *unikmer.Taxonomy
+		var taxondb *taxdump.Taxonomy
 
 		var mt map[uint64]uint32 // kmer -> taxid
 
@@ -109,7 +111,7 @@ Tips:
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var k int = -1
 		var canonical bool
 		var hashed bool
@@ -157,7 +159,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if !reader.IsSorted() {
@@ -196,7 +198,7 @@ Tips:
 			}()
 		}
 
-		var reader *unikmer.Reader
+		var reader *unik.Reader
 		for i, file := range files {
 			if opt.Verbose {
 				log.Infof("processing file (%d/%d): %s", i+1, nfiles, file)
@@ -211,7 +213,7 @@ Tips:
 				var taxid, lca uint32
 				var ok bool
 
-				reader, err = unikmer.NewReader(infh)
+				reader, err = unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if firstFile {
@@ -308,18 +310,18 @@ Tips:
 		}()
 
 		var mode uint32
-		mode |= unikmer.UnikSorted
+		mode |= unik.UnikSorted
 		if canonical {
-			mode |= unikmer.UnikCanonical
+			mode |= unik.UnikCanonical
 		}
 		if hasTaxid || hasMixTaxid {
-			mode |= unikmer.UnikIncludeTaxID
+			mode |= unik.UnikIncludeTaxID
 		}
 		if hashed {
-			mode |= unikmer.UnikHashed
+			mode |= unik.UnikHashed
 		}
 
-		writer, err := unikmer.NewWriter(outfh, k, mode)
+		writer, err := unik.NewWriter(outfh, k, mode)
 		checkError(errors.Wrap(err, outFile))
 		writer.SetMaxTaxid(opt.MaxTaxid) // follow taxondb
 
@@ -337,7 +339,7 @@ Tips:
 			log.Infof("no shared k-mers found")
 		}
 
-		// sort.Sort(unikmer.CodeSlice(codes))
+		// sort.Sort(kmers.CodeSlice(codes))
 		sortutil.Uint64s(codes)
 
 		if hasTaxid || hasMixTaxid {
diff -pruN 0.18.8-1/unikmer/cmd/concat.go 0.19.0-1/unikmer/cmd/concat.go
--- 0.18.8-1/unikmer/cmd/concat.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/concat.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,8 @@ import (
 	"os"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -83,11 +84,11 @@ Attentions:
 			w.Close()
 		}()
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -107,7 +108,7 @@ Attentions:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -119,20 +120,20 @@ Attentions:
 
 					var mode uint32
 					if sortedKmers { // || (len(files) == 1 && reader.IsSorted()) {
-						mode |= unikmer.UnikSorted
+						mode |= unik.UnikSorted
 					} else if opt.Compact && !hashed {
-						mode |= unikmer.UnikCompact
+						mode |= unik.UnikCompact
 					}
 					if canonical {
-						mode |= unikmer.UnikCanonical
+						mode |= unik.UnikCanonical
 					}
 					if hasTaxid && !hasGlobalTaxid {
-						mode |= unikmer.UnikIncludeTaxID
+						mode |= unik.UnikIncludeTaxID
 					}
 					if hashed {
-						mode |= unikmer.UnikHashed
+						mode |= unik.UnikHashed
 					}
-					writer, err = unikmer.NewWriter(outfh, k, mode)
+					writer, err = unik.NewWriter(outfh, k, mode)
 					checkError(err)
 					writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 					if hasGlobalTaxid {
diff -pruN 0.18.8-1/unikmer/cmd/count.go 0.19.0-1/unikmer/cmd/count.go
--- 0.18.8-1/unikmer/cmd/count.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/count.go	2022-04-25 12:44:35.000000000 +0000
@@ -29,7 +29,10 @@ import (
 	"github.com/pkg/errors"
 	"github.com/shenwei356/bio/seq"
 	"github.com/shenwei356/bio/seqio/fastx"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/sketches"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 	"github.com/twotwotwo/sorts/sortutil"
 )
@@ -40,13 +43,13 @@ var countCmd = &cobra.Command{
 	Long: `Generate k-mers (sketch) from FASTA/Q sequences
 
 K-mer:
-  1. K-mer code (k<=32):
-  2. Hased k-mer (ntHash):
+  1. K-mer code (k<=32)
+  2. Hashed k-mer (ntHash)
 
 K-mer sketches:
   1. Scaled MinHash
   2. Minimizer
-  3. Syncmer
+  3. Closed Syncmer
 
 `,
 	Run: func(cmd *cobra.Command, args []string) {
@@ -55,6 +58,20 @@ K-mer sketches:
 
 		var err error
 
+		reSeqNameStrs := getFlagStringSlice(cmd, "seq-name-filter")
+		reSeqNames := make([]*regexp.Regexp, 0, len(reSeqNameStrs))
+		for _, kw := range reSeqNameStrs {
+			if !reIgnoreCase.MatchString(kw) {
+				kw = reIgnoreCaseStr + kw
+			}
+			re, err := regexp.Compile(kw)
+			if err != nil {
+				checkError(errors.Wrapf(err, "failed to parse regular expression for matching sequence header: %s", kw))
+			}
+			reSeqNames = append(reSeqNames, re)
+		}
+		filterNames := len(reSeqNames) > 0
+
 		outFile := getFlagString(cmd, "out-prefix")
 		k := getFlagPositiveInt(cmd, "kmer-len")
 		canonical := getFlagBool(cmd, "canonical")
@@ -199,12 +216,12 @@ K-mer sketches:
 			log.Infof("set global taxid: %d", taxid)
 		}
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 		var mode uint32
 		var n uint64
 
 		var m map[uint64]struct{}
-		var taxondb *unikmer.Taxonomy
+		var taxondb *taxdump.Taxonomy
 		var mt map[uint64]uint32
 
 		// could use bloom filter
@@ -213,18 +230,18 @@ K-mer sketches:
 
 		if linear {
 			if opt.Compact && !hashed {
-				mode |= unikmer.UnikCompact
+				mode |= unik.UnikCompact
 			}
 			if canonical {
-				mode |= unikmer.UnikCanonical
+				mode |= unik.UnikCanonical
 			}
 			if parseTaxid {
-				mode |= unikmer.UnikIncludeTaxID
+				mode |= unik.UnikIncludeTaxID
 			}
 			if hashed {
-				mode |= unikmer.UnikHashed
+				mode |= unik.UnikHashed
 			}
-			writer, err = unikmer.NewWriter(outfh, k, mode)
+			writer, err = unik.NewWriter(outfh, k, mode)
 			checkError(errors.Wrap(err, outFile))
 			writer.SetMaxTaxid(opt.MaxTaxid)
 			if setGlobalTaxid {
@@ -256,8 +273,11 @@ K-mer sketches:
 		var mark bool
 		var nseq int64
 		var code uint64
-		var iter *unikmer.Iterator
-		var sketch *unikmer.Sketch
+		var iter *sketches.Iterator
+		var sketch *sketches.Sketch
+		var ignoreSeq bool
+		var re *regexp.Regexp
+
 		for _, file := range files {
 			if opt.Verbose {
 				log.Infof("reading sequence file: %s", file)
@@ -274,17 +294,30 @@ K-mer sketches:
 					break
 				}
 
+				if filterNames {
+					ignoreSeq = false
+					for _, re = range reSeqNames {
+						if re.Match(record.Name) {
+							ignoreSeq = true
+							break
+						}
+					}
+					if ignoreSeq {
+						continue
+					}
+				}
+
 				if syncmer {
-					sketch, err = unikmer.NewSyncmerSketch(record.Seq, k, syncmerS, circular)
+					sketch, err = sketches.NewSyncmerSketch(record.Seq, k, syncmerS, circular)
 				} else if minimizer {
-					sketch, err = unikmer.NewMinimizerSketch(record.Seq, k, minimizerW, circular)
+					sketch, err = sketches.NewMinimizerSketch(record.Seq, k, minimizerW, circular)
 				} else if hashed {
-					iter, err = unikmer.NewHashIterator(record.Seq, k, canonical, circular)
+					iter, err = sketches.NewHashIterator(record.Seq, k, canonical, circular)
 				} else {
-					iter, err = unikmer.NewKmerIterator(record.Seq, k, canonical, circular)
+					iter, err = sketches.NewKmerIterator(record.Seq, k, canonical, circular)
 				}
 				if err != nil {
-					if err == unikmer.ErrShortSeq {
+					if err == sketches.ErrShortSeq {
 						if opt.Verbose && moreVerbose {
 							log.Infof("ignore short seq: %s", record.Name)
 						}
@@ -410,20 +443,20 @@ K-mer sketches:
 		}
 
 		if sortKmers {
-			mode |= unikmer.UnikSorted
+			mode |= unik.UnikSorted
 		} else if opt.Compact && !hashed {
-			mode |= unikmer.UnikCompact
+			mode |= unik.UnikCompact
 		}
 		if canonical {
-			mode |= unikmer.UnikCanonical
+			mode |= unik.UnikCanonical
 		}
 		if parseTaxid {
-			mode |= unikmer.UnikIncludeTaxID
+			mode |= unik.UnikIncludeTaxID
 		}
 		if hashed {
-			mode |= unikmer.UnikHashed
+			mode |= unik.UnikHashed
 		}
-		writer, err = unikmer.NewWriter(outfh, k, mode)
+		writer, err = unik.NewWriter(outfh, k, mode)
 		checkError(errors.Wrap(err, outFile))
 		writer.SetMaxTaxid(opt.MaxTaxid)
 		if setGlobalTaxid {
@@ -540,7 +573,7 @@ K-mer sketches:
 			if opt.Verbose {
 				log.Infof("sorting %d k-mers", len(codes))
 			}
-			// sort.Sort(unikmer.CodeSlice(codes))
+			// sort.Sort(sketches.CodeSlice(codes))
 			sortutil.Uint64s(codes)
 			if opt.Verbose {
 				log.Infof("done sorting")
@@ -567,6 +600,8 @@ K-mer sketches:
 func init() {
 	RootCmd.AddCommand(countCmd)
 
+	countCmd.Flags().StringSliceP("seq-name-filter", "B", []string{}, `list of regular expressions for filtering out sequences by header/name, case ignored.`)
+
 	countCmd.Flags().StringP("out-prefix", "o", "-", `out file prefix ("-" for stdout)`)
 	countCmd.Flags().IntP("kmer-len", "k", 0, "k-mer length")
 	countCmd.Flags().BoolP("canonical", "K", false, "only keep the canonical k-mers")
@@ -574,15 +609,21 @@ func init() {
 	countCmd.Flags().Uint32P("taxid", "t", 0, "global taxid")
 	countCmd.Flags().BoolP("parse-taxid", "T", false, `parse taxid from FASTA/Q header`)
 	countCmd.Flags().StringP("parse-taxid-regexp", "r", "", `regular expression for passing taxid`)
-	countCmd.Flags().BoolP("repeated", "d", false, `only count duplicated k-mers, for removing singleton in FASTQ`)
-	countCmd.Flags().BoolP("unique", "u", false, `only count unique k-mers, which are not duplicated`)
+	countCmd.Flags().BoolP("repeated", "d", false, `only count duplicate k-mers, for removing singleton in FASTQ`)
+	countCmd.Flags().BoolP("unique", "u", false, `only count unique k-mers, which are not duplicate`)
 	countCmd.Flags().BoolP("more-verbose", "V", false, `print extra verbose information`)
 	countCmd.Flags().BoolP("hash", "H", false, `save hash of k-mer, automatically on for k>32. This flag overides global flag -c/--compact`)
 	countCmd.Flags().BoolP("circular", "", false, "circular genome")
 
 	countCmd.Flags().IntP("scale", "D", 1, `scale/down-sample factor`)
 	countCmd.Flags().IntP("minimizer-w", "W", 0, `minimizer window size`)
-	countCmd.Flags().IntP("syncmer-s", "S", 0, `bounded syncmer length`)
+	countCmd.Flags().IntP("syncmer-s", "S", 0, `closed syncmer length`)
 
 	countCmd.Flags().BoolP("linear", "l", false, `output k-mers in linear order`)
+
+	countCmd.SetUsageTemplate(usageTemplate("-K -k <k> -u -s [-t <taxid>] <seq files> -o <out prefix>"))
+
 }
+
+var reIgnoreCaseStr = "(?i)"
+var reIgnoreCase = regexp.MustCompile(`\(\?i\)`)
diff -pruN 0.18.8-1/unikmer/cmd/decode.go 0.19.0-1/unikmer/cmd/decode.go
--- 0.18.8-1/unikmer/cmd/decode.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/decode.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,8 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/shenwei356/breader"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/kmers"
+
 	"github.com/spf13/cobra"
 )
 
@@ -95,14 +96,14 @@ var decodeCmd = &cobra.Command{
 						checkError(fmt.Errorf("encode kmer should be non-negative integer: %s", line))
 					}
 
-					if code < 0 {
-						checkError(fmt.Errorf("encode kmer should be non-negative integer: %d", code))
-					}
-					if code > unikmer.MaxCode[k] {
-						checkError(fmt.Errorf("encode integer overflows for k=%d (max: %d): %d", k, unikmer.MaxCode[k], code))
+					// if code < 0 {
+					// 	checkError(fmt.Errorf("encode kmer should be non-negative integer: %d", code))
+					// }
+					if code > kmers.MaxCode[k] {
+						checkError(fmt.Errorf("encode integer overflows for k=%d (max: %d): %d", k, kmers.MaxCode[k], code))
 					}
 
-					kmer = unikmer.Decode(code, k)
+					kmer = kmers.Decode(code, k)
 					if err != nil {
 						checkError(fmt.Errorf("fail to decode '%s': %s", line, err))
 					}
diff -pruN 0.18.8-1/unikmer/cmd/diff.go 0.19.0-1/unikmer/cmd/diff.go
--- 0.18.8-1/unikmer/cmd/diff.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/diff.go	2022-04-25 12:44:35.000000000 +0000
@@ -28,7 +28,9 @@ import (
 	"sync"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 	"github.com/twotwotwo/sorts/sortutil"
 )
@@ -42,10 +44,10 @@ Attentions:
   0. The first file should be sorted.
   1. The 'canonical/scaled/hashed' flags of all files should be consistent.
   2. By default taxids in the 2nd and later files are ignored.
-  3. You can switch on flag -t/--compare-taxid , and input
+  3. You can switch on flag -t/--compare-taxid, and input
      files should ALL have or don't have taxid information.
      A same k-mer found but query taxid equals to target taxid,
-     or query taxid is ancester of target taxid, this k-mer remains
+     or query taxid is ancester of target taxid, this k-mer remains.
 
 Tips:
   1. Increasing threads number (-j/--threads) to accelerate computation
@@ -79,11 +81,11 @@ Tips:
 
 		threads := opt.NumCPUs
 
-		mc := make([]unikmer.CodeTaxid, 0, mapInitSize)
+		mc := make([]CodeTaxid, 0, mapInitSize)
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -92,7 +94,7 @@ Tips:
 		var hasTaxid bool
 		var ok bool
 
-		var taxondb *unikmer.Taxonomy
+		var taxondb *taxdump.Taxonomy
 
 		// -----------------------------------------------------------------------
 
@@ -106,7 +108,7 @@ Tips:
 		infh, r, _, err = inStream(file)
 		checkError(err)
 
-		reader, err := unikmer.NewReader(infh)
+		reader, err := unik.NewReader(infh)
 		checkError(errors.Wrap(err, file))
 
 		if !reader.IsSorted() { // query is sorted
@@ -139,7 +141,7 @@ Tips:
 				checkError(errors.Wrap(err, file))
 			}
 
-			mc = append(mc, unikmer.CodeTaxid{Code: code, Taxid: taxid})
+			mc = append(mc, CodeTaxid{Code: code, Taxid: taxid})
 		}
 		n0 = len(mc)
 
@@ -169,21 +171,21 @@ Tips:
 
 			var mode uint32
 			if sortKmers {
-				mode |= unikmer.UnikSorted
+				mode |= unik.UnikSorted
 			} else if opt.Compact && !hashed {
-				mode |= unikmer.UnikCompact
+				mode |= unik.UnikCompact
 			}
 			if canonical {
-				mode |= unikmer.UnikCanonical
+				mode |= unik.UnikCanonical
 			}
 			if hasTaxid {
-				mode |= unikmer.UnikIncludeTaxID
+				mode |= unik.UnikIncludeTaxID
 			}
 			if hashed {
-				mode |= unikmer.UnikHashed
+				mode |= unik.UnikHashed
 			}
 
-			writer, err := unikmer.NewWriter(outfh, k, mode)
+			writer, err := unik.NewWriter(outfh, k, mode)
 			checkError(errors.Wrap(err, outFile))
 			writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 
@@ -228,7 +230,7 @@ Tips:
 
 		maps := make(map[int]map[uint64]uint32, threads)
 
-		mapsc := make(map[int][]unikmer.CodeTaxid, threads)
+		mapsc := make(map[int][]CodeTaxid, threads)
 		mapsc[0] = mc
 
 		if threads > 1 {
@@ -240,7 +242,7 @@ Tips:
 			type iMap struct {
 				i  int
 				m  map[uint64]uint32
-				mc []unikmer.CodeTaxid
+				mc []CodeTaxid
 			}
 			ch := make(chan iMap, threads)
 			doneClone := make(chan int)
@@ -253,10 +255,8 @@ Tips:
 			for i := 1; i < threads; i++ {
 				wg.Add(1)
 				go func(i int) {
-					mc1 := make([]unikmer.CodeTaxid, len(mc))
-					for i, ct := range mc {
-						mc1[i] = ct
-					}
+					mc1 := make([]CodeTaxid, len(mc))
+					copy(mc1, mc)
 					ch <- iMap{i: i, mc: mc1}
 					wg.Done()
 				}(i)
@@ -297,7 +297,7 @@ Tips:
 				var file string
 				var infh *bufio.Reader
 				var r *os.File
-				var reader *unikmer.Reader
+				var reader *unik.Reader
 				var ok bool
 				var sorted bool
 				var m1 map[uint64]uint32
@@ -322,7 +322,7 @@ Tips:
 					infh, r, _, err = inStream(file)
 					checkError(err)
 
-					reader, err = unikmer.NewReader(infh)
+					reader, err = unik.NewReader(infh)
 					checkError(errors.Wrap(err, file))
 
 					checkCompatibility(reader0, reader, file)
@@ -376,7 +376,7 @@ Tips:
 							return
 						}
 					} else {
-						mc2 := make([]unikmer.CodeTaxid, 0, len(mc1))
+						mc2 := make([]CodeTaxid, 0, len(mc1))
 						var qCode, code uint64
 						var qtaxid, taxid uint32
 						ii := 0
@@ -544,21 +544,21 @@ Tips:
 
 		var mode uint32
 		if sortKmers {
-			mode |= unikmer.UnikSorted
+			mode |= unik.UnikSorted
 		} else if opt.Compact && !hashed {
-			mode |= unikmer.UnikCompact
+			mode |= unik.UnikCompact
 		}
 		if canonical {
-			mode |= unikmer.UnikCanonical
+			mode |= unik.UnikCanonical
 		}
 		if hasTaxid {
-			mode |= unikmer.UnikIncludeTaxID
+			mode |= unik.UnikIncludeTaxID
 		}
 		if hashed {
-			mode |= unikmer.UnikHashed
+			mode |= unik.UnikHashed
 		}
 
-		writer, err := unikmer.NewWriter(outfh, k, mode)
+		writer, err := unik.NewWriter(outfh, k, mode)
 		checkError(errors.Wrap(err, outFile))
 		writer.SetMaxTaxid(opt.MaxTaxid)
 
diff -pruN 0.18.8-1/unikmer/cmd/dump.go 0.19.0-1/unikmer/cmd/dump.go
--- 0.18.8-1/unikmer/cmd/dump.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/dump.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,9 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/shenwei356/breader"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/kmers"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 	"github.com/will-rowe/nthash"
 )
@@ -97,7 +99,7 @@ Attentions:
 			w.Close()
 		}()
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var m map[uint64]struct{}
 		if unique {
@@ -110,7 +112,7 @@ Attentions:
 		var data interface{}
 		var line string
 		var linebytes []byte
-		var kcode, kcodeC unikmer.KmerCode
+		var kcode, kcodeC kmers.KmerCode
 		var ok bool
 		var n int64
 
@@ -197,20 +199,20 @@ Attentions:
 					if writer == nil {
 						var mode uint32
 						if sortedKmers {
-							mode |= unikmer.UnikSorted
+							mode |= unik.UnikSorted
 						} else if opt.Compact && !hashed {
-							mode |= unikmer.UnikCompact
+							mode |= unik.UnikCompact
 						}
 						if canonical || canonicalOnly {
-							mode |= unikmer.UnikCanonical
+							mode |= unik.UnikCanonical
 						}
 						if includeTaxid {
-							mode |= unikmer.UnikIncludeTaxID
+							mode |= unik.UnikIncludeTaxID
 						}
 						if hashed || hashedAlready {
-							mode |= unikmer.UnikHashed
+							mode |= unik.UnikHashed
 						}
-						writer, err = unikmer.NewWriter(outfh, k, mode)
+						writer, err = unik.NewWriter(outfh, k, mode)
 						if err != nil {
 							checkError(errors.Wrap(err, outFile))
 						}
@@ -277,7 +279,7 @@ Attentions:
 						continue
 					}
 
-					kcode, err = unikmer.NewKmerCode([]byte(line))
+					kcode, err = kmers.NewKmerCode([]byte(line))
 					if err != nil {
 						checkError(fmt.Errorf("fail to encode '%s': %s", line, err))
 					}
@@ -323,7 +325,7 @@ func init() {
 	RootCmd.AddCommand(dumpCmd)
 
 	dumpCmd.Flags().StringP("out-prefix", "o", "-", `out file prefix ("-" for stdout)`)
-	dumpCmd.Flags().BoolP("unique", "u", false, `remove duplicated k-mers`)
+	dumpCmd.Flags().BoolP("unique", "u", false, `remove duplicate k-mers`)
 	dumpCmd.Flags().BoolP("canonical", "K", false, "save the canonical k-mers")
 	dumpCmd.Flags().BoolP("canonical-only", "O", false, "only save the canonical k-mers. This flag overides -K/--canonical")
 	dumpCmd.Flags().BoolP("sorted", "s", false, "input k-mers are sorted")
diff -pruN 0.18.8-1/unikmer/cmd/encode.go 0.19.0-1/unikmer/cmd/encode.go
--- 0.18.8-1/unikmer/cmd/encode.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/encode.go	2022-04-25 12:44:35.000000000 +0000
@@ -26,7 +26,8 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/shenwei356/breader"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/kmers"
+
 	"github.com/spf13/cobra"
 	"github.com/will-rowe/nthash"
 )
@@ -76,7 +77,7 @@ var encodeCmd = &cobra.Command{
 		var data interface{}
 		var line string
 		var linebytes []byte
-		var kcode unikmer.KmerCode
+		var kcode kmers.KmerCode
 		var hasher *nthash.NTHi
 		var hash uint64
 		for _, file := range files {
@@ -108,7 +109,7 @@ var encodeCmd = &cobra.Command{
 						continue
 					}
 
-					kcode, err = unikmer.NewKmerCode([]byte(line))
+					kcode, err = kmers.NewKmerCode([]byte(line))
 					if err != nil {
 						checkError(fmt.Errorf("fail to encode '%s': %s", line, err))
 					}
diff -pruN 0.18.8-1/unikmer/cmd/filter.go 0.19.0-1/unikmer/cmd/filter.go
--- 0.18.8-1/unikmer/cmd/filter.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/filter.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,14 +27,15 @@ import (
 	"os"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
 var filterCmd = &cobra.Command{
 	Use:   "filter",
-	Short: "Filter low-complexity k-mers (experimental)",
-	Long: `Filter low-complexity k-mers (experimental)
+	Short: "Filter out low-complexity k-mers (experimental)",
+	Long: `Filter out low-complexity k-mers (experimental)
 
 Attentions:
   1. This command only detects single base repeat now.
@@ -82,11 +83,11 @@ Attentions:
 			w.Close()
 		}()
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -105,7 +106,7 @@ Attentions:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -118,7 +119,7 @@ Attentions:
 
 					scores = make([]int, k)
 
-					writer, err = unikmer.NewWriter(outfh, k, reader.Flag)
+					writer, err = unik.NewWriter(outfh, k, reader.Flag)
 					checkError(errors.Wrap(err, outFile))
 					writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 				} else {
diff -pruN 0.18.8-1/unikmer/cmd/genautocomplete.go 0.19.0-1/unikmer/cmd/genautocomplete.go
--- 0.18.8-1/unikmer/cmd/genautocomplete.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/genautocomplete.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,99 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package cmd
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-
-	homedir "github.com/mitchellh/go-homedir"
-	"github.com/shenwei356/util/pathutil"
-	"github.com/spf13/cobra"
-)
-
-// genautocompleteCmd represents the fq2fa command
-var genautocompleteCmd = &cobra.Command{
-	Use:   "genautocomplete",
-	Short: "generate shell autocompletion script (bash|zsh|fish|powershell)",
-	Long: `generate shell autocompletion script
-
-Supported shell: bash|zsh|fish|powershell
-
-Bash:
-
-    # generate completion shell
-    unikmer genautocomplete --shell bash
-
-    # configure if never did.
-    # install bash-completion if the "complete" command is not found.
-    echo "for bcfile in ~/.bash_completion.d/* ; do source \$bcfile; done" >> ~/.bash_completion
-    echo "source ~/.bash_completion" >> ~/.bashrc
-
-Zsh:
-
-    # generate completion shell
-    unikmer genautocomplete --shell zsh --file ~/.zfunc/_unikmer
-
-    # configure if never did
-    echo 'fpath=( ~/.zfunc "${fpath[@]}" )' >> ~/.zshrc
-    echo "autoload -U compinit; compinit" >> ~/.zshrc
-
-fish:
-
-    unikmer genautocomplete --shell fish --file ~/.config/fish/completions/unikmer.fish
-
-`,
-	Run: func(cmd *cobra.Command, args []string) {
-		outfile := getFlagString(cmd, "file")
-		shell := getFlagString(cmd, "shell")
-
-		dir := filepath.Dir(outfile)
-		ok, err := pathutil.DirExists(dir)
-		checkError(err)
-		if !ok {
-			os.MkdirAll(dir, 0744)
-		}
-
-		switch shell {
-		case "bash":
-			checkError(cmd.Root().GenBashCompletionFile(outfile))
-		case "zsh":
-			checkError(cmd.Root().GenZshCompletionFile(outfile))
-		case "fish":
-			checkError(cmd.Root().GenFishCompletionFile(outfile, true))
-		case "powershell":
-			checkError(cmd.Root().GenPowerShellCompletionFile(outfile))
-		default:
-			checkError(fmt.Errorf("unsupported shell: %s", shell))
-		}
-
-		log.Infof("%s completion file for unikmer saved to %s", shell, outfile)
-	},
-}
-
-func init() {
-	RootCmd.AddCommand(genautocompleteCmd)
-	defaultCompletionFile, err := homedir.Expand("~/.bash_completion.d/unikmer.sh")
-	checkError(err)
-	genautocompleteCmd.Flags().StringP("file", "", defaultCompletionFile, "autocompletion file")
-	genautocompleteCmd.Flags().StringP("shell", "", "bash", "autocompletion type (bash|zsh|fish|powershell)")
-}
diff -pruN 0.18.8-1/unikmer/cmd/grep.go 0.19.0-1/unikmer/cmd/grep.go
--- 0.18.8-1/unikmer/cmd/grep.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/grep.go	2022-04-25 12:44:35.000000000 +0000
@@ -31,7 +31,9 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/shenwei356/breader"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/kmers"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/shenwei356/util/pathutil"
 	"github.com/spf13/cobra"
 	"github.com/twotwotwo/sorts"
@@ -163,7 +165,7 @@ Tips:
 		}
 
 		// encode k-mers or parse taxids
-		var kcode unikmer.KmerCode
+		var kcode kmers.KmerCode
 		var mer []byte
 		var _queries [][]byte
 		var val uint64
@@ -191,7 +193,7 @@ Tips:
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var flag int
 		var canonical bool
 		var hashed bool
@@ -214,7 +216,7 @@ Tips:
 					checkError(err)
 					defer r.Close()
 
-					reader, err := unikmer.NewReader(infh)
+					reader, err := unik.NewReader(infh)
 					checkError(errors.Wrap(err, file))
 
 					canonical = reader.IsCanonical()
@@ -250,7 +252,7 @@ Tips:
 							continue
 						}
 						if !canonical && !hashed {
-							code = unikmer.Canonical(code, k)
+							code = kmers.Canonical(code, k)
 						}
 						m[code] = struct{}{}
 					}
@@ -314,7 +316,7 @@ Tips:
 		var outfh *bufio.Writer
 		var gw io.WriteCloser
 		var w *os.File
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 		var hasTaxid bool
 
 		if !mOutputs {
@@ -369,17 +371,17 @@ Tips:
 		tokens := make(chan int, threads)
 
 		var codes []uint64
-		var codesTaxids []unikmer.CodeTaxid
+		var codesTaxids []CodeTaxid
 		if sortKmers {
 			codes = make([]uint64, 0, mapInitSize)
-			codesTaxids = make([]unikmer.CodeTaxid, 0, mapInitSize)
+			codesTaxids = make([]CodeTaxid, 0, mapInitSize)
 		}
 
 		// read k-mers from goroutines
 		var ns int
 		var done chan int
 		var chCodes chan uint64
-		var chCodesTaxids chan unikmer.CodeTaxid
+		var chCodesTaxids chan CodeTaxid
 
 		var once sync.Once
 		chEncodeQueries := make(chan int)
@@ -387,7 +389,7 @@ Tips:
 		if !mOutputs {
 			done = make(chan int)
 			chCodes = make(chan uint64, threads)
-			chCodesTaxids = make(chan unikmer.CodeTaxid, threads)
+			chCodesTaxids = make(chan CodeTaxid, threads)
 		}
 
 		nfiles = len(files)
@@ -403,7 +405,7 @@ Tips:
 
 				var infh *bufio.Reader
 				var r *os.File
-				var reader *unikmer.Reader
+				var reader *unik.Reader
 
 				var n int
 				var _k int
@@ -423,7 +425,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err = unikmer.NewReader(infh)
+				reader, err = unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				_k = reader.K
@@ -457,7 +459,7 @@ Tips:
 						}
 					} else {
 						for _, q := range _queries {
-							kcode, err = unikmer.NewKmerCode(q)
+							kcode, err = kmers.NewKmerCode(q)
 							if err != nil {
 								checkError(fmt.Errorf("fail to encode query '%s': %s", mer, err))
 							}
@@ -493,22 +495,22 @@ Tips:
 
 						var mode uint32
 
-						mode |= unikmer.UnikCanonical // forcing using canonical
+						mode |= unik.UnikCanonical // forcing using canonical
 						if sortKmers {
-							mode |= unikmer.UnikSorted
+							mode |= unik.UnikSorted
 						} else if len(files) == 1 && reader.IsSorted() {
 							// if the only input file is already sorted, we don't have to sort again.
-							mode |= unikmer.UnikSorted
+							mode |= unik.UnikSorted
 						} else if opt.Compact && !_hashed {
-							mode |= unikmer.UnikCompact
+							mode |= unik.UnikCompact
 						}
 						if hasTaxid {
-							mode |= unikmer.UnikIncludeTaxID
+							mode |= unik.UnikIncludeTaxID
 						}
 						if _hashed {
-							mode |= unikmer.UnikHashed
+							mode |= unik.UnikHashed
 						}
-						writer, err = unikmer.NewWriter(outfh, reader.K, mode)
+						writer, err = unik.NewWriter(outfh, reader.K, mode)
 						checkError(errors.Wrap(err, outFile))
 						writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 
@@ -559,9 +561,9 @@ Tips:
 					}
 				}
 
-				var _writer *unikmer.Writer
+				var _writer *unik.Writer
 				var _codes []uint64
-				var _codesTaxids []unikmer.CodeTaxid
+				var _codesTaxids []CodeTaxid
 				var _outFile string
 
 				if mOutputs {
@@ -578,21 +580,21 @@ Tips:
 					}()
 
 					var mode uint32
-					mode |= unikmer.UnikCanonical
+					mode |= unik.UnikCanonical
 					if sortKmers {
-						mode |= unikmer.UnikSorted
+						mode |= unik.UnikSorted
 					} else if reader.IsSorted() {
-						mode |= unikmer.UnikSorted
+						mode |= unik.UnikSorted
 					} else if opt.Compact && !hashed {
-						mode |= unikmer.UnikCompact
+						mode |= unik.UnikCompact
 					}
 					if _isIncludeTaxid {
-						mode |= unikmer.UnikIncludeTaxID
+						mode |= unik.UnikIncludeTaxID
 					}
 					if _hashed {
-						mode |= unikmer.UnikHashed
+						mode |= unik.UnikHashed
 					}
-					_writer, err = unikmer.NewWriter(_outfh, reader.K, mode)
+					_writer, err = unik.NewWriter(_outfh, reader.K, mode)
 					checkError(errors.Wrap(err, _outFile))
 					_writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 					if _hasGlobalTaxid {
@@ -601,7 +603,7 @@ Tips:
 
 					if sortKmers && _mustSort {
 						if _isIncludeTaxid {
-							_codesTaxids = make([]unikmer.CodeTaxid, 0, mapInitSize)
+							_codesTaxids = make([]CodeTaxid, 0, mapInitSize)
 						} else if _hasGlobalTaxid {
 							_codes = make([]uint64, 0, mapInitSize)
 						} else {
@@ -636,7 +638,7 @@ Tips:
 							}
 						} else {
 							if !_canonical && !hashed {
-								code = unikmer.Canonical(code, _k)
+								code = kmers.Canonical(code, _k)
 							}
 							_, ok = m[code]
 						}
@@ -655,7 +657,7 @@ Tips:
 					if mOutputs {
 						if sortKmers && _mustSort {
 							if _isIncludeTaxid {
-								_codesTaxids = append(_codesTaxids, unikmer.CodeTaxid{Code: code, Taxid: taxid})
+								_codesTaxids = append(_codesTaxids, CodeTaxid{Code: code, Taxid: taxid})
 							} else {
 								_codes = append(_codes, code)
 							}
@@ -665,7 +667,7 @@ Tips:
 						}
 					} else {
 						if hasTaxid {
-							chCodesTaxids <- unikmer.CodeTaxid{Code: code, Taxid: taxid}
+							chCodesTaxids <- CodeTaxid{Code: code, Taxid: taxid}
 						} else {
 							chCodes <- code
 						}
@@ -681,8 +683,8 @@ Tips:
 						if opt.Verbose {
 							log.Infof("[file %d/%d] sorting %d k-mers", i+1, nfiles, len(_codesTaxids))
 						}
-						// sort.Sort(unikmer.CodeTaxidSlice(_codesTaxids))
-						sorts.Quicksort(unikmer.CodeTaxidSlice(_codesTaxids))
+						// sort.Sort(CodeTaxidSlice(_codesTaxids))
+						sorts.Quicksort(CodeTaxidSlice(_codesTaxids))
 					} else {
 						if opt.Verbose {
 							log.Infof("[file %d/%d] sorting %d k-mers", i+1, nfiles, len(_codes))
@@ -789,8 +791,8 @@ Tips:
 				if opt.Verbose {
 					log.Infof("sorting %d k-mers", len(codesTaxids))
 				}
-				// sort.Sort(unikmer.CodeTaxidSlice(codesTaxids))
-				sorts.Quicksort(unikmer.CodeTaxidSlice(codesTaxids))
+				// sort.Sort(CodeTaxidSlice(codesTaxids))
+				sorts.Quicksort(CodeTaxidSlice(codesTaxids))
 			} else {
 				if opt.Verbose {
 					log.Infof("sorting %d k-mers", len(codes))
@@ -903,7 +905,7 @@ func init() {
 	grepCmd.Flags().BoolP("force", "", false, "overwrite output directory")
 
 	grepCmd.Flags().BoolP("sort", "s", false, helpSort)
-	grepCmd.Flags().BoolP("unique", "u", false, `remove duplicated k-mers`)
+	grepCmd.Flags().BoolP("unique", "u", false, `remove duplicate k-mers`)
 	grepCmd.Flags().BoolP("repeated", "d", false, `only print duplicate k-mers`)
 
 }
diff -pruN 0.18.8-1/unikmer/cmd/head.go 0.19.0-1/unikmer/cmd/head.go
--- 0.18.8-1/unikmer/cmd/head.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/head.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,8 @@ import (
 	"os"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -76,11 +77,11 @@ Attentions:
 			w.Close()
 		}()
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -99,7 +100,7 @@ Attentions:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -109,9 +110,9 @@ Attentions:
 
 					mode := reader.Flag
 					if hasTaxid {
-						mode |= unikmer.UnikIncludeTaxID // for multiple input files
+						mode |= unik.UnikIncludeTaxID // for multiple input files
 					}
-					writer, err = unikmer.NewWriter(outfh, k, mode)
+					writer, err = unik.NewWriter(outfh, k, mode)
 					checkError(errors.Wrap(err, outFile))
 					writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 				} else {
diff -pruN 0.18.8-1/unikmer/cmd/inter.go 0.19.0-1/unikmer/cmd/inter.go
--- 0.18.8-1/unikmer/cmd/inter.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/inter.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,9 @@ import (
 	"os"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -72,14 +74,14 @@ Tips:
 		mixTaxid := getFlagBool(cmd, "mix-taxid")
 		var hasMixTaxid bool
 
-		var taxondb *unikmer.Taxonomy
+		var taxondb *taxdump.Taxonomy
 
-		mc := make([]unikmer.CodeTaxid, 0, mapInitSize)
+		mc := make([]CodeTaxid, 0, mapInitSize)
 		m := make([]bool, 0, mapInitSize) // marking common elements
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var k int = -1
 		var canonical bool
 		var hashed bool
@@ -130,7 +132,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if !reader.IsSorted() {
@@ -168,7 +170,7 @@ Tips:
 			}()
 		}
 
-		var reader *unikmer.Reader
+		var reader *unik.Reader
 		for i, file := range files {
 			if opt.Verbose {
 				log.Infof("processing file (%d/%d): %s", i+1, nfiles, file)
@@ -179,7 +181,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err = unikmer.NewReader(infh)
+				reader, err = unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if firstFile {
@@ -192,7 +194,7 @@ Tips:
 							checkError(errors.Wrap(err, file))
 						}
 
-						mc = append(mc, unikmer.CodeTaxid{Code: code, Taxid: taxid})
+						mc = append(mc, CodeTaxid{Code: code, Taxid: taxid})
 						m = append(m, false)
 					}
 					firstFile = false
@@ -263,7 +265,7 @@ Tips:
 					}
 				}
 
-				mc1 := make([]unikmer.CodeTaxid, 0, n)
+				mc1 := make([]CodeTaxid, 0, n)
 				n = 0
 				for ii, found := range m {
 					if found {
@@ -319,18 +321,18 @@ Tips:
 		}()
 
 		var mode uint32
-		mode |= unikmer.UnikSorted
+		mode |= unik.UnikSorted
 		if canonical {
-			mode |= unikmer.UnikCanonical
+			mode |= unik.UnikCanonical
 		}
 		if hasTaxid || hasMixTaxid {
-			mode |= unikmer.UnikIncludeTaxID
+			mode |= unik.UnikIncludeTaxID
 		}
 		if hashed {
-			mode |= unikmer.UnikHashed
+			mode |= unik.UnikHashed
 		}
 
-		writer, err := unikmer.NewWriter(outfh, k, mode)
+		writer, err := unik.NewWriter(outfh, k, mode)
 		checkError(errors.Wrap(err, outFile))
 		writer.SetMaxTaxid(opt.MaxTaxid) // follow taxondb
 
diff -pruN 0.18.8-1/unikmer/cmd/kmers.go 0.19.0-1/unikmer/cmd/kmers.go
--- 0.18.8-1/unikmer/cmd/kmers.go	1970-01-01 00:00:00.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/kmers.go	2022-04-25 12:44:35.000000000 +0000
@@ -0,0 +1,46 @@
+// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+package cmd
+
+// CodeTaxid is the code-taxid pair
+type CodeTaxid struct {
+	Code uint64
+	// _     uint32 // needed? to test
+	Taxid uint32
+}
+
+// CodeTaxidSlice is a list of CodeTaxid, just for sorting
+type CodeTaxidSlice []CodeTaxid
+
+// Len return length of the slice
+func (pairs CodeTaxidSlice) Len() int {
+	return len(pairs)
+}
+
+// Swap swaps two elements
+func (pairs CodeTaxidSlice) Swap(i, j int) {
+	pairs[i], pairs[j] = pairs[j], pairs[i]
+}
+
+// Less simply compare two KmerCode
+func (pairs CodeTaxidSlice) Less(i, j int) bool {
+	return pairs[i].Code < pairs[j].Code
+}
diff -pruN 0.18.8-1/unikmer/cmd/locate.go 0.19.0-1/unikmer/cmd/locate.go
--- 0.18.8-1/unikmer/cmd/locate.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/locate.go	2022-04-25 12:44:35.000000000 +0000
@@ -25,12 +25,15 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"regexp"
 	"strings"
 
 	"github.com/pkg/errors"
 	"github.com/shenwei356/bio/seq"
 	"github.com/shenwei356/bio/seqio/fastx"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/sketches"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -54,6 +57,20 @@ Attention:
 
 		var err error
 
+		reSeqNameStrs := getFlagStringSlice(cmd, "seq-name-filter")
+		reSeqNames := make([]*regexp.Regexp, 0, len(reSeqNameStrs))
+		for _, kw := range reSeqNameStrs {
+			if !reIgnoreCase.MatchString(kw) {
+				kw = reIgnoreCaseStr + kw
+			}
+			re, err := regexp.Compile(kw)
+			if err != nil {
+				checkError(errors.Wrapf(err, "failed to parse regular expression for matching sequence header: %s", kw))
+			}
+			reSeqNames = append(reSeqNames, re)
+		}
+		filterNames := len(reSeqNames) > 0
+
 		if opt.Verbose {
 			log.Info("checking input files ...")
 		}
@@ -89,7 +106,7 @@ Attention:
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var nfiles = len(files)
 		for i, file := range files {
 			if isStdin(file) {
@@ -103,7 +120,7 @@ Attention:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -132,10 +149,12 @@ Attention:
 
 		var fastxReader *fastx.Reader
 		var record *fastx.Record
-		var iter *unikmer.Iterator
+		var iter *sketches.Iterator
 		var code uint64
 		var ok bool
 		var seqIdx int
+		var ignoreSeq bool
+		var re *regexp.Regexp
 
 		for _, file := range genomes {
 			if opt.Verbose {
@@ -153,14 +172,28 @@ Attention:
 					checkError(errors.Wrap(err, file))
 					break
 				}
+
+				if filterNames {
+					ignoreSeq = false
+					for _, re = range reSeqNames {
+						if re.Match(record.Name) {
+							ignoreSeq = true
+							break
+						}
+					}
+					if ignoreSeq {
+						continue
+					}
+				}
+
 				// using ntHash
 				if hashed {
-					iter, err = unikmer.NewHashIterator(record.Seq, k, true, circular)
+					iter, err = sketches.NewHashIterator(record.Seq, k, true, circular)
 				} else {
-					iter, err = unikmer.NewKmerIterator(record.Seq, k, true, circular)
+					iter, err = sketches.NewKmerIterator(record.Seq, k, true, circular)
 				}
 				if err != nil {
-					if err == unikmer.ErrShortSeq {
+					if err == sketches.ErrShortSeq {
 						if opt.Verbose {
 							log.Infof("ignore short seq: %s", record.Name)
 						}
@@ -209,7 +242,7 @@ Attention:
 			w.Close()
 		}()
 
-		var reader *unikmer.Reader
+		var reader *unik.Reader
 		var locs [][2]int
 		var loc [2]int
 		var j int
@@ -226,7 +259,7 @@ Attention:
 				checkError(err)
 				defer r.Close()
 
-				reader, err = unikmer.NewReader(infh)
+				reader, err = unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				for {
@@ -257,6 +290,8 @@ Attention:
 func init() {
 	RootCmd.AddCommand(locateCmd)
 
+	locateCmd.Flags().StringSliceP("seq-name-filter", "B", []string{}, `list of regular expressions for filtering out sequences by header/name, case ignored`)
+
 	locateCmd.Flags().StringP("out-prefix", "o", "-", `out file prefix ("-" for stdout)`)
 	locateCmd.Flags().StringSliceP("genome", "g", []string{}, "genomes in (gzipped) fasta file(s)")
 	locateCmd.Flags().BoolP("circular", "", false, `circular genome. type "unikmer locate -h" for details`)
diff -pruN 0.18.8-1/unikmer/cmd/merge.go 0.19.0-1/unikmer/cmd/merge.go
--- 0.18.8-1/unikmer/cmd/merge.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/merge.go	2022-04-25 12:44:35.000000000 +0000
@@ -29,9 +29,10 @@ import (
 	"regexp"
 
 	"github.com/pkg/errors"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
 	"github.com/shenwei356/util/pathutil"
 
-	"github.com/shenwei356/unikmer"
 	"github.com/spf13/cobra"
 )
 
@@ -137,13 +138,13 @@ Tips:
 		}
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var k int = -1
 		var canonical bool
 		var hashed bool
 		var hasTaxid bool
 		var mode uint32
-		var taxondb *unikmer.Taxonomy
+		var taxondb *taxdump.Taxonomy
 
 		_files := make([]string, 0, len(files))
 		for _, file := range files {
@@ -157,7 +158,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if !reader.IsSorted() {
@@ -172,15 +173,15 @@ Tips:
 					hasTaxid = !opt.IgnoreTaxid && reader.HasTaxidInfo()
 
 					if canonical {
-						mode |= unikmer.UnikCanonical
+						mode |= unik.UnikCanonical
 					}
 					if hasTaxid {
-						mode |= unikmer.UnikIncludeTaxID
+						mode |= unik.UnikIncludeTaxID
 					}
 					if hashed {
-						mode |= unikmer.UnikHashed
+						mode |= unik.UnikHashed
 					}
-					mode |= unikmer.UnikSorted
+					mode |= unik.UnikSorted
 
 					if hasTaxid {
 						if opt.Verbose {
@@ -343,7 +344,7 @@ func init() {
 	mergeCmd.Flags().StringP("pattern", "p", `^chunk_\d+\.unik$`, `chunk file pattern (regular expression)`)
 
 	mergeCmd.Flags().StringP("out-prefix", "o", "-", `out file prefix ("-" for stdout)`)
-	mergeCmd.Flags().BoolP("unique", "u", false, `remove duplicated k-mers`)
+	mergeCmd.Flags().BoolP("unique", "u", false, `remove duplicate k-mers`)
 	mergeCmd.Flags().BoolP("repeated", "d", false, `only print duplicate k-mers`)
 
 	mergeCmd.Flags().IntP("max-open-files", "M", 400, `max number of open files`)
diff -pruN 0.18.8-1/unikmer/cmd/num.go 0.19.0-1/unikmer/cmd/num.go
--- 0.18.8-1/unikmer/cmd/num.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/num.go	2022-04-25 12:44:35.000000000 +0000
@@ -29,7 +29,8 @@ import (
 	"strings"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -78,7 +79,7 @@ Attention:
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader *unikmer.Reader
+		var reader *unik.Reader
 
 		for _, file := range files {
 			func() {
@@ -86,7 +87,7 @@ Attention:
 				checkError(err)
 				defer r.Close()
 
-				reader, err = unikmer.NewReader(infh)
+				reader, err = unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if reader.Number < 0 && force {
diff -pruN 0.18.8-1/unikmer/cmd/rfilter.go 0.19.0-1/unikmer/cmd/rfilter.go
--- 0.18.8-1/unikmer/cmd/rfilter.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/rfilter.go	2022-04-25 12:44:35.000000000 +0000
@@ -30,7 +30,9 @@ import (
 	"strings"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
 	"github.com/shenwei356/util/pathutil"
 	"github.com/shenwei356/util/stringutil"
 	"github.com/spf13/cobra"
@@ -46,7 +48,7 @@ var rfilterCmd = &cobra.Command{
 Attentions:
   1. Flag -L/--lower-than and -H/--higher-than are exclusive, and can be
      used along with -E/--equal-to which values can be different.
-  2. A list of pre-ordered ranks is in ~/.taxonkit/ranks.txt, you can use
+  2. A list of pre-ordered ranks is in ~/.unikmer/ranks.txt, you can use
      your list by -r/--rank-file, the format specification is below.
   3. All ranks in taxonomy database should be defined in rank file.
   4. Ranks can be removed with black list via -B/--black-list.
@@ -110,6 +112,9 @@ Rank file:
 
 		if saveNorank {
 			discardNoRank = true
+			if !discardNoRank {
+				log.Infof("flag -N/--discard-noranks is switched on when using -n/--save-predictable-norank")
+			}
 
 			if lower == "" {
 				checkError(fmt.Errorf("flag -n/--save-predictable-norank only works along with -L/--lower-than"))
@@ -197,13 +202,16 @@ Rank file:
 
 		if opt.Verbose {
 			if discardNoRank {
-				log.Debugf("ranks without order will be discarded: %s", strings.Join(noRanksList, ", "))
+				log.Infof("ranks without order will be discarded: %s", strings.Join(noRanksList, ", "))
+			}
+			if saveNorank {
+				log.Infof("but some predictable 'no rank' will be saved with -n/--save-predictable-norank)")
 			}
 			if discardRoot {
-				log.Debugf("root rank without order will be discarded")
+				log.Infof("root rank without order will be discarded")
 			}
 			if len(blackListRanks) > 0 {
-				log.Debugf("ranks in black list will be discarded: %s", strings.Join(blackListRanks, ", "))
+				log.Infof("ranks in black list will be discarded: %s", strings.Join(blackListRanks, ", "))
 			}
 		}
 
@@ -223,11 +231,11 @@ Rank file:
 			w.Close()
 		}()
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -247,7 +255,7 @@ Rank file:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				hasTaxid = !opt.IgnoreTaxid && reader.HasTaxidInfo()
@@ -259,8 +267,8 @@ Rank file:
 					}
 
 					mode := reader.Flag
-					mode |= unikmer.UnikIncludeTaxID
-					writer, err = unikmer.NewWriter(outfh, k, mode)
+					mode |= unik.UnikIncludeTaxID
+					writer, err = unik.NewWriter(outfh, k, mode)
 					checkError(errors.Wrap(err, outFile))
 					writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 				} else {
@@ -334,7 +342,7 @@ func init() {
 }
 
 type rankFilter struct {
-	taxondb *unikmer.Taxonomy
+	taxondb *taxdump.Taxonomy
 
 	dbRanks   map[string]interface{}
 	rankOrder map[string]int
@@ -361,7 +369,7 @@ type rankFilter struct {
 	cache map[uint32]bool
 }
 
-func newRankFilter(taxondb *unikmer.Taxonomy, rankOrder map[string]int, noRanks map[string]interface{},
+func newRankFilter(taxondb *taxdump.Taxonomy, rankOrder map[string]int, noRanks map[string]interface{},
 	lower string, higher string, equals []string, blackList []string, discardNorank bool, saveKnownNoRank bool) (*rankFilter, error) {
 
 	if lower != "" && higher != "" {
diff -pruN 0.18.8-1/unikmer/cmd/root.go 0.19.0-1/unikmer/cmd/root.go
--- 0.18.8-1/unikmer/cmd/root.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/root.go	2022-04-25 12:44:35.000000000 +0000
@@ -33,8 +33,8 @@ import (
 // RootCmd represents the base command when called without any subcommands
 var RootCmd = &cobra.Command{
 	Use:   "unikmer",
-	Short: "Unique-Kmer Toolkit",
-	Long: fmt.Sprintf(`unikmer - Unique-Kmer Toolkit
+	Short: "Toolkit for k-mer with taxonomic information",
+	Long: fmt.Sprintf(`unikmer - Toolkit for k-mer with taxonomic information
 
 unikmer is a toolkit for nucleic acid k-mer analysis, providing functions
 including set operation on k-mers optional with TaxIds but without count
@@ -52,7 +52,7 @@ Version: v%s
 
 Author: Wei Shen <shenwei356@gmail.com>
 
-Documents  : https://shenwei356.github.io/unikmer
+Documents  : https://bioinf.shenwei.me/unikmer
 Source code: https://github.com/shenwei356/unikmer
 
 Dataset (optional):
@@ -64,11 +64,12 @@ Dataset (optional):
   or some other directory, and later you can refer to using flag
   --data-dir or environment variable UNIKMER_DB.
 
-  For GTDB, use https://github.com/nick-youngblut/gtdb_to_taxdump 
-  for taxonomy conversion.
+  For GTDB, use 'taxonkit create-taxdump' to create NCBI-style
+  taxonomy dump files, or download from:
+    https://github.com/shenwei356/gtdb-taxonomy
 
   Note that TaxIds are represented using uint32 and stored in 4 or
-  less bytes, all TaxIds should be in range of [1, %d]
+  less bytes, all TaxIds should be in the range of [1, %d]
 
 `, VERSION, maxUint32),
 }
@@ -90,11 +91,11 @@ func init() {
 	checkError(err)
 
 	defaultThreads := runtime.NumCPU()
-	if defaultThreads > 2 {
-		defaultThreads = 2
+	if defaultThreads > 4 {
+		defaultThreads = 4
 	}
 
-	RootCmd.PersistentFlags().IntP("threads", "j", defaultThreads, "number of CPUs to use. (default value: 1 for single-CPU PC, 2 for others)")
+	RootCmd.PersistentFlags().IntP("threads", "j", defaultThreads, "number of CPUs to use")
 	RootCmd.PersistentFlags().BoolP("verbose", "", false, "print verbose information")
 	RootCmd.PersistentFlags().BoolP("no-compress", "C", false, "do not compress binary file (not recommended)")
 	RootCmd.PersistentFlags().IntP("compression-level", "", flate.DefaultCompression, "compression level")
@@ -105,7 +106,39 @@ func init() {
 	RootCmd.PersistentFlags().BoolP("ignore-taxid", "I", false, "ignore taxonomy information")
 	RootCmd.PersistentFlags().StringP("data-dir", "", defaultDataDir, "directory containing NCBI Taxonomy files, including nodes.dmp, names.dmp, merged.dmp and delnodes.dmp")
 
-	RootCmd.PersistentFlags().BoolP("nocheck-file", "", false, "do not check binary file, when using process substitution/named pipe")
+	RootCmd.PersistentFlags().BoolP("nocheck-file", "", false, "do not check binary file, when using process substitution or named pipe")
+
+	RootCmd.CompletionOptions.DisableDefaultCmd = true
+	RootCmd.SetHelpCommand(&cobra.Command{Hidden: true})
+
+	RootCmd.SetUsageTemplate(usageTemplate(""))
 }
 
 const helpSort = "sort k-mers, this significantly reduce file size for k<=25. This flag overides global flag -c/--compact"
+
+func usageTemplate(s string) string {
+	return fmt.Sprintf(`Usage:{{if .Runnable}}
+  {{.UseLine}}{{end}}{{if .HasAvailableSubCommands}}
+  {{.CommandPath}} [command]{{end}} %s{{if gt (len .Aliases) 0}}
+
+Aliases:
+  {{.NameAndAliases}}{{end}}{{if .HasExample}}
+
+Examples:
+{{.Example}}{{end}}{{if .HasAvailableSubCommands}}
+
+Available Commands:{{range .Commands}}{{if (or .IsAvailableCommand (eq .Name "help"))}}
+  {{rpad .Name .NamePadding }} {{.Short}}{{end}}{{end}}{{end}}{{if .HasAvailableLocalFlags}}
+
+Flags:
+{{.LocalFlags.FlagUsagesWrapped 110 | trimTrailingWhitespaces}}{{end}}{{if .HasAvailableInheritedFlags}}
+
+Global Flags:
+{{.InheritedFlags.FlagUsagesWrapped 110 | trimTrailingWhitespaces}}{{end}}{{if .HasHelpSubCommands}}
+
+Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}}
+  {{rpad .CommandPath .CommandPathPadding}} {{.Short}}{{end}}{{end}}{{end}}{{if .HasAvailableSubCommands}}
+
+Use "{{.CommandPath}} [command] --help" for more information about a command.{{end}}
+`, s)
+}
diff -pruN 0.18.8-1/unikmer/cmd/sample.go 0.19.0-1/unikmer/cmd/sample.go
--- 0.18.8-1/unikmer/cmd/sample.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/sample.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,8 @@ import (
 	"os"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -80,11 +81,11 @@ Attentions:
 			w.Close()
 		}()
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -103,14 +104,19 @@ Attentions:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
 					reader0 = reader
 					k = reader.K
 					hasTaxid = !opt.IgnoreTaxid && reader.HasTaxidInfo()
-					writer, err = unikmer.NewWriter(outfh, k, reader.Flag)
+
+					mode := reader.Flag
+					if hasTaxid {
+						mode |= unik.UnikIncludeTaxID // for multiple input files
+					}
+					writer, err = unik.NewWriter(outfh, k, mode)
 					checkError(errors.Wrap(err, outFile))
 					writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 				} else {
diff -pruN 0.18.8-1/unikmer/cmd/sort.go 0.19.0-1/unikmer/cmd/sort.go
--- 0.18.8-1/unikmer/cmd/sort.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/sort.go	2022-04-25 12:44:35.000000000 +0000
@@ -30,7 +30,9 @@ import (
 	"sync"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
 	"github.com/shenwei356/util/pathutil"
 	"github.com/spf13/cobra"
 	"github.com/twotwotwo/sorts"
@@ -101,8 +103,8 @@ Tips:
 		checkFileSuffix(opt, extDataFile, files...)
 
 		var m []uint64
-		var taxondb *unikmer.Taxonomy
-		var mt []unikmer.CodeTaxid
+		var taxondb *taxdump.Taxonomy
+		var mt []CodeTaxid
 
 		outFile := outFile0
 		if !isStdout(outFile) {
@@ -134,11 +136,11 @@ Tips:
 			checkError(os.MkdirAll(tmpDir, 0777))
 		}
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -177,7 +179,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -191,7 +193,7 @@ Tips:
 						if opt.Verbose {
 							log.Infof("taxids found in file: %s", file)
 						}
-						mt = make([]unikmer.CodeTaxid, 0, listInitSize)
+						mt = make([]CodeTaxid, 0, listInitSize)
 						if unique || repeated {
 							taxondb = loadTaxonomy(opt, false)
 						}
@@ -200,15 +202,15 @@ Tips:
 					}
 
 					if canonical {
-						mode |= unikmer.UnikCanonical
+						mode |= unik.UnikCanonical
 					}
 					if hasTaxid {
-						mode |= unikmer.UnikIncludeTaxID
+						mode |= unik.UnikIncludeTaxID
 					}
 					if hashed {
-						mode |= unikmer.UnikHashed
+						mode |= unik.UnikHashed
 					}
-					mode |= unikmer.UnikSorted
+					mode |= unik.UnikSorted
 				} else {
 					checkCompatibility(reader0, reader, file)
 					if !opt.IgnoreTaxid && reader.HasTaxidInfo() != hasTaxid {
@@ -230,7 +232,7 @@ Tips:
 					}
 
 					if hasTaxid {
-						mt = append(mt, unikmer.CodeTaxid{Code: code, Taxid: taxid})
+						mt = append(mt, CodeTaxid{Code: code, Taxid: taxid})
 					} else {
 						m = append(m, code)
 					}
@@ -251,7 +253,7 @@ Tips:
 
 						wg.Add(1)
 						tokens <- 1
-						go func(m []uint64, mt []unikmer.CodeTaxid, iTmpFile int, outFile string) {
+						go func(m []uint64, mt []CodeTaxid, iTmpFile int, outFile string) {
 							defer func() {
 								wg.Done()
 								<-tokens
@@ -261,8 +263,8 @@ Tips:
 								if opt.Verbose {
 									log.Infof("[chunk %d] sorting %d k-mers", iTmpFile, len(mt))
 								}
-								// sort.Sort(unikmer.CodeTaxidSlice(mt))
-								sorts.Quicksort(unikmer.CodeTaxidSlice(mt))
+								// sort.Sort(CodeTaxidSlice(mt))
+								sorts.Quicksort(CodeTaxidSlice(mt))
 							} else {
 								if opt.Verbose {
 									log.Infof("[chunk %d] sorting %d k-mers", iTmpFile, len(m))
@@ -287,7 +289,7 @@ Tips:
 						}(m, mt, iTmpFile, outFile1)
 
 						if hasTaxid {
-							mt = make([]unikmer.CodeTaxid, 0, listInitSize)
+							mt = make([]CodeTaxid, 0, listInitSize)
 						} else {
 							m = make([]uint64, 0, listInitSize)
 						}
@@ -314,7 +316,7 @@ Tips:
 
 				wg.Add(1)
 				tokens <- 1
-				go func(m []uint64, mt []unikmer.CodeTaxid, iTmpFile int, outFile string) {
+				go func(m []uint64, mt []CodeTaxid, iTmpFile int, outFile string) {
 					defer func() {
 						wg.Done()
 						<-tokens
@@ -324,8 +326,8 @@ Tips:
 						if opt.Verbose {
 							log.Infof("[chunk %d] sorting %d k-mers", iTmpFile, len(mt))
 						}
-						// sort.Sort(unikmer.CodeTaxidSlice(mt))
-						sorts.Quicksort(unikmer.CodeTaxidSlice(mt))
+						// sort.Sort(CodeTaxidSlice(mt))
+						sorts.Quicksort(CodeTaxidSlice(mt))
 					} else {
 						if opt.Verbose {
 							log.Infof("[chunk %d] sorting %d k-mers", iTmpFile, len(m))
@@ -450,8 +452,8 @@ Tips:
 			if opt.Verbose {
 				log.Infof("sorting %d k-mers", len(mt))
 			}
-			// sort.Sort(unikmer.CodeTaxidSlice(mt))
-			sorts.Quicksort(unikmer.CodeTaxidSlice(mt))
+			// sort.Sort(CodeTaxidSlice(mt))
+			sorts.Quicksort(CodeTaxidSlice(mt))
 		} else {
 			if opt.Verbose {
 				log.Infof("sorting %d k-mers", len(m))
@@ -472,7 +474,7 @@ Tips:
 			}
 			w.Close()
 		}()
-		writer, err = unikmer.NewWriter(outfh, k, mode)
+		writer, err = unik.NewWriter(outfh, k, mode)
 		checkError(errors.Wrap(err, outFile))
 		writer.SetMaxTaxid(opt.MaxTaxid) // follow taxondb
 
@@ -580,7 +582,7 @@ func init() {
 	RootCmd.AddCommand(sortCmd)
 
 	sortCmd.Flags().StringP("out-prefix", "o", "-", `out file prefix ("-" for stdout)`)
-	sortCmd.Flags().BoolP("unique", "u", false, `remove duplicated k-mers`)
+	sortCmd.Flags().BoolP("unique", "u", false, `remove duplicate k-mers`)
 	sortCmd.Flags().BoolP("repeated", "d", false, `only print duplicate k-mers`)
 	sortCmd.Flags().StringP("chunk-size", "m", "", `split input into chunks of N k-mers, supports K/M/G suffix, type "unikmer sort -h" for detail`)
 	sortCmd.Flags().StringP("tmp-dir", "t", "./", `directory for intermediate files`)
diff -pruN 0.18.8-1/unikmer/cmd/split.go 0.19.0-1/unikmer/cmd/split.go
--- 0.18.8-1/unikmer/cmd/split.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/split.go	2022-04-25 12:44:35.000000000 +0000
@@ -30,11 +30,12 @@ import (
 	"sync"
 
 	"github.com/pkg/errors"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
 	"github.com/shenwei356/util/pathutil"
 	"github.com/twotwotwo/sorts"
 	"github.com/twotwotwo/sorts/sortutil"
 
-	"github.com/shenwei356/unikmer"
 	"github.com/spf13/cobra"
 )
 
@@ -91,8 +92,8 @@ Tips:
 		checkFileSuffix(opt, extDataFile, files...)
 
 		var m []uint64
-		var taxondb *unikmer.Taxonomy
-		var mt []unikmer.CodeTaxid
+		var taxondb *taxdump.Taxonomy
+		var mt []CodeTaxid
 
 		if outDir == "" {
 			if isStdin(files[0]) {
@@ -123,7 +124,7 @@ Tips:
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var k int = -1
@@ -155,7 +156,7 @@ Tips:
 
 		var outFile2 string
 		var n int
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 		var outfh *bufio.Writer
 		var gw io.WriteCloser
 		var w *os.File
@@ -171,7 +172,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -189,7 +190,7 @@ Tips:
 							if opt.Verbose {
 								log.Infof("taxids found in file: %s", file)
 							}
-							mt = make([]unikmer.CodeTaxid, 0, listInitSize)
+							mt = make([]CodeTaxid, 0, listInitSize)
 							taxondb = loadTaxonomy(opt, false)
 						} else {
 							m = make([]uint64, 0, listInitSize)
@@ -199,15 +200,15 @@ Tips:
 					}
 
 					if canonical {
-						mode |= unikmer.UnikCanonical
+						mode |= unik.UnikCanonical
 					}
 					if hasTaxid {
-						mode |= unikmer.UnikIncludeTaxID
+						mode |= unik.UnikIncludeTaxID
 					}
 					if hashed {
-						mode |= unikmer.UnikHashed
+						mode |= unik.UnikHashed
 					}
-					mode |= unikmer.UnikSorted
+					mode |= unik.UnikSorted
 
 					if doNotNeedSorting {
 						iTmpFile++
@@ -215,7 +216,7 @@ Tips:
 						outfh, gw, w, err = outStream(outFile2, opt.Compress, opt.CompressionLevel)
 						checkError(err)
 
-						writer, err = unikmer.NewWriter(outfh, k, mode)
+						writer, err = unik.NewWriter(outfh, k, mode)
 						checkError(errors.Wrap(err, outFile2))
 						writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 						if opt.Verbose {
@@ -264,7 +265,7 @@ Tips:
 							outfh, gw, w, err = outStream(outFile2, opt.Compress, opt.CompressionLevel)
 							checkError(err)
 
-							writer, err = unikmer.NewWriter(outfh, k, mode)
+							writer, err = unik.NewWriter(outfh, k, mode)
 							checkError(errors.Wrap(err, outFile2))
 							writer.SetMaxTaxid(maxUint32N(reader.GetTaxidBytesLength())) // follow reader
 
@@ -279,7 +280,7 @@ Tips:
 					}
 
 					if hasTaxid {
-						mt = append(mt, unikmer.CodeTaxid{Code: code, Taxid: taxid})
+						mt = append(mt, CodeTaxid{Code: code, Taxid: taxid})
 					} else {
 						m = append(m, code)
 					}
@@ -290,7 +291,7 @@ Tips:
 
 						wg.Add(1)
 						tokens <- 1
-						go func(m []uint64, mt []unikmer.CodeTaxid, iTmpFile int, outFile string) {
+						go func(m []uint64, mt []CodeTaxid, iTmpFile int, outFile string) {
 							defer func() {
 								wg.Done()
 								<-tokens
@@ -300,8 +301,8 @@ Tips:
 								if opt.Verbose {
 									log.Infof("[chunk %d] sorting %d k-mers", iTmpFile, len(mt))
 								}
-								// sort.Sort(unikmer.CodeTaxidSlice(mt))
-								sorts.Quicksort(unikmer.CodeTaxidSlice(mt))
+								// sort.Sort(CodeTaxidSlice(mt))
+								sorts.Quicksort(CodeTaxidSlice(mt))
 							} else {
 								if opt.Verbose {
 									log.Infof("[chunk %d] sorting %d k-mers", iTmpFile, len(m))
@@ -323,7 +324,7 @@ Tips:
 						}(m, mt, iTmpFile, outFile1)
 
 						if hasTaxid {
-							mt = make([]unikmer.CodeTaxid, 0, listInitSize)
+							mt = make([]CodeTaxid, 0, listInitSize)
 						} else {
 							m = make([]uint64, 0, listInitSize)
 						}
@@ -369,7 +370,7 @@ Tips:
 
 			wg.Add(1)
 			tokens <- 1
-			go func(m []uint64, mt []unikmer.CodeTaxid, iTmpFile int, outFile string) {
+			go func(m []uint64, mt []CodeTaxid, iTmpFile int, outFile string) {
 				defer func() {
 					wg.Done()
 					<-tokens
@@ -416,6 +417,6 @@ func init() {
 	splitCmd.Flags().StringP("out-dir", "O", "", `output directory`)
 	splitCmd.Flags().StringP("chunk-size", "m", "", `split input into chunks of N k-mers, supports K/M/G suffix, type "unikmer sort -h" for detail`)
 	splitCmd.Flags().BoolP("force", "", false, `overwrite output directory`)
-	splitCmd.Flags().BoolP("unique", "u", false, `split for further removing duplicated k-mers`)
+	splitCmd.Flags().BoolP("unique", "u", false, `split for further removing duplicate k-mers`)
 	splitCmd.Flags().BoolP("repeated", "d", false, `split for further printing duplicate k-mers`)
 }
diff -pruN 0.18.8-1/unikmer/cmd/stats.go 0.19.0-1/unikmer/cmd/stats.go
--- 0.18.8-1/unikmer/cmd/stats.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/stats.go	2022-04-25 12:44:35.000000000 +0000
@@ -32,7 +32,8 @@ import (
 
 	humanize "github.com/dustin/go-humanize"
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 	prettytable "github.com/tatsushid/go-prettytable"
 	"github.com/twotwotwo/sorts/sortutil"
@@ -337,7 +338,7 @@ Tips:
 
 				var infh *bufio.Reader
 				var r *os.File
-				var reader *unikmer.Reader
+				var reader *unik.Reader
 				var gzipped bool
 				var n uint64
 				var globalTaxid string
@@ -357,7 +358,7 @@ Tips:
 				}
 				defer r.Close()
 
-				reader, err = unikmer.NewReader(infh)
+				reader, err = unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 				if err != nil {
 					select {
diff -pruN 0.18.8-1/unikmer/cmd/tsplit.go 0.19.0-1/unikmer/cmd/tsplit.go
--- 0.18.8-1/unikmer/cmd/tsplit.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/tsplit.go	2022-04-25 12:44:35.000000000 +0000
@@ -31,9 +31,9 @@ import (
 	"sync"
 
 	"github.com/pkg/errors"
+	"github.com/shenwei356/unik/v5"
 	"github.com/shenwei356/util/pathutil"
 
-	"github.com/shenwei356/unikmer"
 	"github.com/spf13/cobra"
 )
 
@@ -113,7 +113,7 @@ Tips:
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var codes *[]uint64
 		var code uint64
 		var taxid uint32
@@ -138,7 +138,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -151,12 +151,12 @@ Tips:
 						checkError(fmt.Errorf("input should be sorted: %s", file))
 					}
 					if canonical {
-						mode |= unikmer.UnikCanonical
+						mode |= unik.UnikCanonical
 					}
 					if hashed {
-						mode |= unikmer.UnikHashed
+						mode |= unik.UnikHashed
 					}
-					mode |= unikmer.UnikSorted
+					mode |= unik.UnikSorted
 					maxTaxid = maxUint32N(reader.GetTaxidBytesLength())
 				} else {
 					checkCompatibility(reader0, reader, file)
@@ -259,7 +259,7 @@ Tips:
 					_w.Close()
 				}()
 
-				_writer, err := unikmer.NewWriter(_outfh, k, mode)
+				_writer, err := unik.NewWriter(_outfh, k, mode)
 				checkError(errors.Wrap(err, _outFile))
 
 				_writer.Number = uint64(len(*codes))
diff -pruN 0.18.8-1/unikmer/cmd/union.go 0.19.0-1/unikmer/cmd/union.go
--- 0.18.8-1/unikmer/cmd/union.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/union.go	2022-04-25 12:44:35.000000000 +0000
@@ -27,7 +27,9 @@ import (
 	"os"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 	"github.com/twotwotwo/sorts/sortutil"
 )
@@ -70,7 +72,7 @@ Tips:
 		sortKmers := getFlagBool(cmd, "sort")
 
 		var m map[uint64]struct{}
-		var taxondb *unikmer.Taxonomy
+		var taxondb *taxdump.Taxonomy
 		var mt map[uint64]uint32
 
 		if !isStdout(outFile) {
@@ -86,7 +88,7 @@ Tips:
 			w.Close()
 		}()
 
-		var writer *unikmer.Writer
+		var writer *unik.Writer
 
 		var infh *bufio.Reader
 		var r *os.File
@@ -108,7 +110,7 @@ Tips:
 			return
 		}
 
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var code uint64
 		var taxid uint32
 		var lca uint32
@@ -130,7 +132,7 @@ Tips:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -152,20 +154,20 @@ Tips:
 					if !hasTaxid && !sortKmers {
 						var mode uint32
 						if sortKmers {
-							mode |= unikmer.UnikSorted
+							mode |= unik.UnikSorted
 						} else if opt.Compact && !hashed {
-							mode |= unikmer.UnikCompact
+							mode |= unik.UnikCompact
 						}
 						if canonical {
-							mode |= unikmer.UnikCanonical
+							mode |= unik.UnikCanonical
 						}
 						if hasTaxid {
-							mode |= unikmer.UnikIncludeTaxID
+							mode |= unik.UnikIncludeTaxID
 						}
 						if hashed {
-							mode |= unikmer.UnikHashed
+							mode |= unik.UnikHashed
 						}
-						writer, err = unikmer.NewWriter(outfh, k, mode)
+						writer, err = unik.NewWriter(outfh, k, mode)
 						checkError(errors.Wrap(err, outFile))
 						writer.SetMaxTaxid(opt.MaxTaxid)
 					}
@@ -217,20 +219,20 @@ Tips:
 		if sortKmers || hasTaxid {
 			var mode uint32
 			if sortKmers {
-				mode |= unikmer.UnikSorted
+				mode |= unik.UnikSorted
 			} else if opt.Compact && !hashed {
-				mode |= unikmer.UnikCompact
+				mode |= unik.UnikCompact
 			}
 			if canonical {
-				mode |= unikmer.UnikCanonical
+				mode |= unik.UnikCanonical
 			}
 			if hasTaxid {
-				mode |= unikmer.UnikIncludeTaxID
+				mode |= unik.UnikIncludeTaxID
 			}
 			if hashed {
-				mode |= unikmer.UnikHashed
+				mode |= unik.UnikHashed
 			}
-			writer, err = unikmer.NewWriter(outfh, k, mode)
+			writer, err = unik.NewWriter(outfh, k, mode)
 			checkError(err)
 			writer.SetMaxTaxid(opt.MaxTaxid)
 
diff -pruN 0.18.8-1/unikmer/cmd/uniqs.go 0.19.0-1/unikmer/cmd/uniqs.go
--- 0.18.8-1/unikmer/cmd/uniqs.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/uniqs.go	2022-04-25 12:44:35.000000000 +0000
@@ -25,12 +25,15 @@ import (
 	"fmt"
 	"io"
 	"os"
+	"regexp"
 	"strings"
 
 	"github.com/pkg/errors"
 	"github.com/shenwei356/bio/seq"
 	"github.com/shenwei356/bio/seqio/fastx"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/sketches"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -53,6 +56,20 @@ Attention:
 
 		var err error
 
+		reSeqNameStrs := getFlagStringSlice(cmd, "seq-name-filter")
+		reSeqNames := make([]*regexp.Regexp, 0, len(reSeqNameStrs))
+		for _, kw := range reSeqNameStrs {
+			if !reIgnoreCase.MatchString(kw) {
+				kw = reIgnoreCaseStr + kw
+			}
+			re, err := regexp.Compile(kw)
+			if err != nil {
+				checkError(errors.Wrapf(err, "failed to parse regular expression for matching sequence header: %s", kw))
+			}
+			reSeqNames = append(reSeqNames, re)
+		}
+		filterNames := len(reSeqNames) > 0
+
 		if opt.Verbose {
 			log.Info("checking input files ...")
 		}
@@ -102,7 +119,7 @@ Attention:
 		var canonical bool
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var hashed bool
 		var code uint64
 		var nfiles = len(files)
@@ -115,7 +132,7 @@ Attention:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -154,10 +171,12 @@ Attention:
 
 		var record *fastx.Record
 		var fastxReader *fastx.Reader
-		var iter *unikmer.Iterator
+		var iter *sketches.Iterator
 		var i int
 		var ok bool
 		var multipleMapped bool
+		var ignoreSeq bool
+		var re *regexp.Regexp
 
 		if !mMapped {
 			m2 = make(map[int]map[uint64]bool, 8)
@@ -180,13 +199,26 @@ Attention:
 						break
 					}
 
+					if filterNames {
+						ignoreSeq = false
+						for _, re = range reSeqNames {
+							if re.Match(record.Name) {
+								ignoreSeq = true
+								break
+							}
+						}
+						if ignoreSeq {
+							continue
+						}
+					}
+
 					if hashed {
-						iter, err = unikmer.NewHashIterator(record.Seq, k, true, circular)
+						iter, err = sketches.NewHashIterator(record.Seq, k, true, circular)
 					} else {
-						iter, err = unikmer.NewKmerIterator(record.Seq, k, true, circular)
+						iter, err = sketches.NewKmerIterator(record.Seq, k, true, circular)
 					}
 					if err != nil {
-						if err == unikmer.ErrShortSeq {
+						if err == sketches.ErrShortSeq {
 							if opt.Verbose {
 								log.Infof("ignore short seq in file '%s': %s", genomeFile, record.Name)
 							}
@@ -276,6 +308,19 @@ Attention:
 					break
 				}
 
+				if filterNames {
+					ignoreSeq = false
+					for _, re = range reSeqNames {
+						if re.Match(record.Name) {
+							ignoreSeq = true
+							break
+						}
+					}
+					if ignoreSeq {
+						continue
+					}
+				}
+
 				length0 = len(record.Seq.Seq)
 
 				if circular { // concat two copies of sequence
@@ -292,9 +337,9 @@ Attention:
 				nonUniqsNum = 0
 
 				if hashed {
-					iter, err = unikmer.NewHashIterator(record.Seq, k, true, false)
+					iter, err = sketches.NewHashIterator(record.Seq, k, true, false)
 				} else {
-					iter, err = unikmer.NewKmerIterator(record.Seq, k, true, false)
+					iter, err = sketches.NewKmerIterator(record.Seq, k, true, false)
 				}
 				checkError(errors.Wrapf(err, "seq: %s", record.Name))
 
@@ -435,6 +480,8 @@ Attention:
 func init() {
 	RootCmd.AddCommand(uniqsCmd)
 
+	uniqsCmd.Flags().StringSliceP("seq-name-filter", "B", []string{}, `list of regular expressions for filtering out sequences by header/name, case ignored`)
+
 	uniqsCmd.Flags().StringP("out-prefix", "o", "-", `out file prefix ("-" for stdout)`)
 	uniqsCmd.Flags().StringSliceP("genome", "g", []string{}, "genomes in (gzipped) fasta file(s)")
 	uniqsCmd.Flags().IntP("min-len", "m", 200, "minimum length of subsequence")
diff -pruN 0.18.8-1/unikmer/cmd/util-binary-file.go 0.19.0-1/unikmer/cmd/util-binary-file.go
--- 0.18.8-1/unikmer/cmd/util-binary-file.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/util-binary-file.go	2022-04-25 12:44:35.000000000 +0000
@@ -23,12 +23,12 @@ package cmd
 import (
 	"fmt"
 
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/unik/v5"
 )
 
 const extDataFile = ".unik"
 
-func checkCompatibility(reader0 *unikmer.Reader, reader *unikmer.Reader, file string) {
+func checkCompatibility(reader0 *unik.Reader, reader *unik.Reader, file string) {
 	if reader0.K != reader.K {
 		checkError(fmt.Errorf(`k-mer length not consistent (%d != %d), please check with "unikmer stats": %s`, reader0.K, reader.K, file))
 	}
diff -pruN 0.18.8-1/unikmer/cmd/util.go 0.19.0-1/unikmer/cmd/util.go
--- 0.18.8-1/unikmer/cmd/util.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/util.go	2022-04-25 12:44:35.000000000 +0000
@@ -32,7 +32,9 @@ import (
 
 	"github.com/pkg/errors"
 	"github.com/shenwei356/bio/seqio/fastx"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/sketches"
+	"github.com/shenwei356/bio/taxdump"
+
 	"github.com/shenwei356/util/pathutil"
 	"github.com/spf13/cobra"
 	"github.com/twotwotwo/sorts"
@@ -108,18 +110,18 @@ func checkDataDir(opt *Options) {
 	}
 }
 
-func loadTaxonomy(opt *Options, withRank bool) *unikmer.Taxonomy {
+func loadTaxonomy(opt *Options, withRank bool) *taxdump.Taxonomy {
 	checkDataDir(opt)
 
 	if opt.Verbose {
 		log.Infof("loading Taxonomy from: %s", opt.DataDir)
 	}
-	var t *unikmer.Taxonomy
+	var t *taxdump.Taxonomy
 	var err error
 	if withRank {
-		t, err = unikmer.NewTaxonomyWithRankFromNCBI(filepath.Join(opt.DataDir, "nodes.dmp"))
+		t, err = taxdump.NewTaxonomyWithRankFromNCBI(filepath.Join(opt.DataDir, "nodes.dmp"))
 	} else {
-		t, err = unikmer.NewTaxonomyFromNCBI(filepath.Join(opt.DataDir, "nodes.dmp"))
+		t, err = taxdump.NewTaxonomyFromNCBI(filepath.Join(opt.DataDir, "nodes.dmp"))
 	}
 	if err != nil {
 		checkError(fmt.Errorf("err on loading Taxonomy nodes: %s", err))
@@ -230,7 +232,7 @@ func extendDegenerateSeq(s []byte) (dseq
 			}
 
 		} else {
-			return dseqs, unikmer.ErrIllegalBase
+			return dseqs, fmt.Errorf("invalid degenerate bases: %s", base)
 		}
 	}
 	return dseqs, nil
@@ -343,7 +345,7 @@ func loadHash2Loc(files []string, k int)
 	var err error
 	var fastxReader *fastx.Reader
 	var record *fastx.Record
-	var iter *unikmer.Iterator
+	var iter *sketches.Iterator
 	var code uint64
 	var ok bool
 	var seqIdx int
@@ -360,7 +362,7 @@ func loadHash2Loc(files []string, k int)
 				checkError(errors.Wrap(err, file))
 				break
 			}
-			iter, err = unikmer.NewHashIterator(record.Seq, k, true, true)
+			iter, err = sketches.NewHashIterator(record.Seq, k, true, true)
 			if err != nil {
 				checkError(errors.Wrapf(err, "seq: %s", record.Name))
 			}
diff -pruN 0.18.8-1/unikmer/cmd/util-sort.go 0.19.0-1/unikmer/cmd/util-sort.go
--- 0.18.8-1/unikmer/cmd/util-sort.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/util-sort.go	2022-04-25 12:44:35.000000000 +0000
@@ -28,7 +28,8 @@ import (
 	"path/filepath"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/bio/taxdump"
+	"github.com/shenwei356/unik/v5"
 )
 
 func dumpCodes2File(m []uint64, k int, mode uint32, outFile string, opt *Options, unique bool, repeated bool) int64 {
@@ -42,7 +43,7 @@ func dumpCodes2File(m []uint64, k int, m
 		w.Close()
 	}()
 
-	writer, err := unikmer.NewWriter(outfh, k, mode)
+	writer, err := unik.NewWriter(outfh, k, mode)
 	checkError(err)
 	writer.SetMaxTaxid(opt.MaxTaxid)
 
@@ -80,7 +81,7 @@ func dumpCodes2File(m []uint64, k int, m
 	return n
 }
 
-func dumpCodesTaxids2File(mt []unikmer.CodeTaxid, taxondb *unikmer.Taxonomy, k int, mode uint32, outFile string, opt *Options, unique bool, repeated bool) int64 {
+func dumpCodesTaxids2File(mt []CodeTaxid, taxondb *taxdump.Taxonomy, k int, mode uint32, outFile string, opt *Options, unique bool, repeated bool) int64 {
 	outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
 	checkError(err)
 	defer func() {
@@ -91,7 +92,7 @@ func dumpCodesTaxids2File(mt []unikmer.C
 		w.Close()
 	}()
 
-	writer, err := unikmer.NewWriter(outfh, k, mode)
+	writer, err := unik.NewWriter(outfh, k, mode)
 	checkError(err)
 	writer.SetMaxTaxid(opt.MaxTaxid)
 
@@ -192,7 +193,7 @@ func (h codeEntryHeap) Pop() interface{}
 	return x
 }
 
-func mergeChunksFile(opt *Options, taxondb *unikmer.Taxonomy, files []string, outFile string, k int, mode uint32, unique bool, repeated bool, finalRound bool) (int64, string) {
+func mergeChunksFile(opt *Options, taxondb *taxdump.Taxonomy, files []string, outFile string, k int, mode uint32, unique bool, repeated bool, finalRound bool) (int64, string) {
 	outfh, gw, w, err := outStream(outFile, opt.Compress, opt.CompressionLevel)
 	checkError(err)
 	defer func() {
@@ -203,26 +204,26 @@ func mergeChunksFile(opt *Options, taxon
 		w.Close()
 	}()
 
-	var writer *unikmer.Writer
-	hasTaxid := mode&unikmer.UnikIncludeTaxID > 0
+	var writer *unik.Writer
+	hasTaxid := mode&unik.UnikIncludeTaxID > 0
 	if hasTaxid && taxondb == nil {
 		checkError(fmt.Errorf("taxon information is need when UnikIncludeTaxID is one"))
 	}
 
-	writer, err = unikmer.NewWriter(outfh, k, mode)
+	writer, err = unik.NewWriter(outfh, k, mode)
 	checkError(err)
 	writer.SetMaxTaxid(opt.MaxTaxid)
 
-	readers := make(map[int]*unikmer.Reader, len(files))
+	readers := make(map[int]*unik.Reader, len(files))
 	fhs := make([]*os.File, len(files))
 
-	var reader *unikmer.Reader
+	var reader *unik.Reader
 	for i, file := range files {
 		infh, fh, _, err := inStream(file)
 		checkError(errors.Wrap(err, file))
 		fhs = append(fhs, fh)
 
-		reader, err := unikmer.NewReader(infh)
+		reader, err := unik.NewReader(infh)
 		checkError(errors.Wrap(err, file))
 		readers[i] = reader
 	}
diff -pruN 0.18.8-1/unikmer/cmd/version.go 0.19.0-1/unikmer/cmd/version.go
--- 0.18.8-1/unikmer/cmd/version.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/version.go	2022-04-25 12:44:35.000000000 +0000
@@ -29,7 +29,7 @@ import (
 )
 
 // VERSION is the version
-var VERSION = "0.17.2"
+var VERSION = "0.19.0"
 
 // versionCmd represents the version command
 var versionCmd = &cobra.Command{
diff -pruN 0.18.8-1/unikmer/cmd/view.go 0.19.0-1/unikmer/cmd/view.go
--- 0.18.8-1/unikmer/cmd/view.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/unikmer/cmd/view.go	2022-04-25 12:44:35.000000000 +0000
@@ -28,7 +28,9 @@ import (
 	"strings"
 
 	"github.com/pkg/errors"
-	"github.com/shenwei356/unikmer"
+	"github.com/shenwei356/kmers"
+	"github.com/shenwei356/unik/v5"
+
 	"github.com/spf13/cobra"
 )
 
@@ -88,7 +90,7 @@ Attentions:
 
 		var infh *bufio.Reader
 		var r *os.File
-		var reader0 *unikmer.Reader
+		var reader0 *unik.Reader
 		var canonical bool
 		var hashed bool
 
@@ -112,7 +114,7 @@ Attentions:
 				checkError(err)
 				defer r.Close()
 
-				reader, err := unikmer.NewReader(infh)
+				reader, err := unik.NewReader(infh)
 				checkError(errors.Wrap(err, file))
 
 				if k == -1 {
@@ -168,7 +170,7 @@ Attentions:
 					}
 
 					if !hashed {
-						kmer = unikmer.MustDecode(code, k)
+						kmer = kmers.MustDecode(code, k)
 					} else {
 						if providingGenomes {
 							if loc, ok = hash2loc[code]; ok {
@@ -184,24 +186,32 @@ Attentions:
 
 					if outFasta {
 						if showTaxid {
-							outfh.WriteString(fmt.Sprintf(">%d %d\n%s\n", code, taxid, kmer))
+							// outfh.WriteString(fmt.Sprintf(">%d %d\n%s\n", code, taxid, kmer))
+							fmt.Fprintf(outfh, ">%d %d\n%s\n", code, taxid, kmer)
 						} else {
-							outfh.WriteString(fmt.Sprintf(">%d\n%s\n", code, kmer))
+							// outfh.WriteString(fmt.Sprintf(">%d\n%s\n", code, kmer))
+							fmt.Fprintf(outfh, ">%d\n%s\n", code, kmer)
 						}
 					} else if outFastq {
 						if showTaxid {
-							outfh.WriteString(fmt.Sprintf("@%d %d\n%s\n+\n%s\n", code, taxid, kmer, quality))
+							// outfh.WriteString(fmt.Sprintf("@%d %d\n%s\n+\n%s\n", code, taxid, kmer, quality))
+							fmt.Fprintf(outfh, "@%d %d\n%s\n+\n%s\n", code, taxid, kmer, quality)
 						} else {
-							outfh.WriteString(fmt.Sprintf("@%d\n%s\n+\n%s\n", code, kmer, quality))
+							// outfh.WriteString(fmt.Sprintf("@%d\n%s\n+\n%s\n", code, kmer, quality))
+							fmt.Fprintf(outfh, "@%d\n%s\n+\n%s\n", code, kmer, quality)
 						}
 					} else if showTaxid {
-						outfh.WriteString(fmt.Sprintf("%s\t%d\n", kmer, taxid))
+						// outfh.WriteString(fmt.Sprintf("%s\t%d\n", kmer, taxid))
+						fmt.Fprintf(outfh, "%s\t%d\n", kmer, taxid)
 					} else if showTaxidOnly {
-						outfh.WriteString(fmt.Sprintf("%d\n", taxid))
+						// outfh.WriteString(fmt.Sprintf("%d\n", taxid))
+						fmt.Fprintf(outfh, "%d\n", taxid)
 					} else if showCodeOnly {
-						outfh.WriteString(fmt.Sprintf("%d\n", code))
+						// outfh.WriteString(fmt.Sprintf("%d\n", code))
+						fmt.Fprintf(outfh, "%d\n", code)
 					} else if showCode {
-						outfh.WriteString(fmt.Sprintf("%s\t%d\n", kmer, code))
+						// outfh.WriteString(fmt.Sprintf("%s\t%d\n", kmer, code))
+						fmt.Fprintf(outfh, "%s\t%d\n", kmer, code)
 					} else {
 						outfh.WriteString(string(kmer) + "\n")
 					}
diff -pruN 0.18.8-1/varint-GB.go 0.19.0-1/varint-GB.go
--- 0.18.8-1/varint-GB.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/varint-GB.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,152 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-var offsets = []uint8{56, 48, 40, 32, 24, 16, 8, 0}
-
-// PutUint64s endcodes two uint64s into 2-16 bytes, and returns control byte
-// and encoded byte length.
-func PutUint64s(buf []byte, v1, v2 uint64) (ctrl byte, n int) {
-	blen := byteLength(v1)
-	ctrl |= byte(blen - 1)
-	for _, offset := range offsets[8-blen:] {
-		buf[n] = byte((v1 >> offset) & 0xff)
-		n++
-	}
-
-	ctrl <<= 3
-	blen = byteLength(v2)
-	ctrl |= byte(blen - 1)
-	for _, offset := range offsets[8-blen:] {
-		buf[n] = byte((v2 >> offset) & 0xff)
-		n++
-	}
-	return
-}
-
-// Uint64s decode from encoded bytes
-func Uint64s(ctrl byte, buf []byte) (values [2]uint64, n int) {
-	blens := ctrlByte2ByteLengths[ctrl]
-	if len(buf) < int(blens[0]+blens[1]) {
-		return values, 0
-	}
-	for i := 0; i < 2; i++ {
-		for j := uint8(0); j < blens[i]; j++ {
-			values[i] <<= 8
-			values[i] |= uint64(buf[n])
-			n++
-		}
-	}
-
-	return
-}
-
-func byteLength(n uint64) uint8 {
-	if n < 256 {
-		return 1
-	}
-	if n < 65536 {
-		return 2
-	}
-	if n < 16777216 {
-		return 3
-	}
-	if n < 4294967296 {
-		return 4
-	}
-	if n < 1099511627776 {
-		return 5
-	}
-	if n < 281474976710656 {
-		return 6
-	}
-	if n < 72057594037927936 {
-		return 7
-	}
-	return 8
-}
-
-var ctrlByte2ByteLengths = [64][2]uint8{
-	{1, 1}, // 0, 0b000000
-	{1, 2},
-	{1, 3},
-	{1, 4},
-	{1, 5},
-	{1, 6},
-	{1, 7},
-	{1, 8},
-	{2, 1}, // 8, 0b001000
-	{2, 2},
-	{2, 3},
-	{2, 4},
-	{2, 5},
-	{2, 6},
-	{2, 7},
-	{2, 8},
-	{3, 1}, // 16, 0b010000
-	{3, 2},
-	{3, 3},
-	{3, 4},
-	{3, 5},
-	{3, 6},
-	{3, 7},
-	{3, 8},
-	{4, 1}, // 24, 0b011000
-	{4, 2},
-	{4, 3},
-	{4, 4},
-	{4, 5},
-	{4, 6},
-	{4, 7},
-	{4, 8},
-	{5, 1}, // 32, 0b100000
-	{5, 2},
-	{5, 3},
-	{5, 4},
-	{5, 5},
-	{5, 6},
-	{5, 7},
-	{5, 8},
-	{6, 1}, // 40, 0b101000
-	{6, 2},
-	{6, 3},
-	{6, 4},
-	{6, 5},
-	{6, 6},
-	{6, 7},
-	{6, 8},
-	{7, 1}, // 48, 0b110000
-	{7, 2},
-	{7, 3},
-	{7, 4},
-	{7, 5},
-	{7, 6},
-	{7, 7},
-	{7, 8},
-	{8, 1}, // 56, 0b111000
-	{8, 2},
-	{8, 3},
-	{8, 4},
-	{8, 5},
-	{8, 6},
-	{8, 7},
-	{8, 8},
-}
diff -pruN 0.18.8-1/varint-GB_test.go 0.19.0-1/varint-GB_test.go
--- 0.18.8-1/varint-GB_test.go	2021-09-17 02:53:25.000000000 +0000
+++ 0.19.0-1/varint-GB_test.go	1970-01-01 00:00:00.000000000 +0000
@@ -1,59 +0,0 @@
-// Copyright © 2018-2021 Wei Shen <shenwei356@gmail.com>
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-package unikmer
-
-import (
-	"math/rand"
-	"testing"
-)
-
-func TestStreamVByte64(t *testing.T) {
-	ntests := 10000
-	tests := make([][2]uint64, ntests)
-	var i int
-	for ; i < ntests/4; i++ {
-		tests[i] = [2]uint64{rand.Uint64(), rand.Uint64()}
-	}
-	for ; i < ntests/2; i++ {
-		tests[i] = [2]uint64{uint64(rand.Uint32()), uint64(rand.Uint32())}
-	}
-	for ; i < ntests*3/4; i++ {
-		tests[i] = [2]uint64{uint64(rand.Intn(65536)), uint64(rand.Intn(256))}
-	}
-	for ; i < ntests; i++ {
-		tests[i] = [2]uint64{uint64(rand.Intn(256)), uint64(rand.Intn(256))}
-	}
-
-	for i, test := range tests {
-		buf := make([]byte, 16)
-		ctrl, n := PutUint64s(buf, test[0], test[1])
-
-		result, n2 := Uint64s(ctrl, buf[0:n])
-		if n2 == 0 {
-			t.Errorf("#%d, wrong decoded number", i)
-		}
-
-		if result[0] != test[0] || result[1] != test[1] {
-			t.Errorf("#%d, wrong decoded result: %d, %d, answer: %d, %d", i, result[0], result[1], test[0], test[1])
-		}
-		// fmt.Printf("%d, %d => n=%d, buf=%v\n", test[0], test[1], n, buf[0:n])
-	}
-}
