76 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			76 lines
		
	
	
		
			2.2 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
| // The Computer Language Benchmarks Game
 | |
| // https://benchmarksgame-team.pages.debian.net/benchmarksgame/
 | |
| //
 | |
| // contributed by the Rust Project Developers
 | |
| // contributed by TeXitoi
 | |
| // contributed by BurntSushi
 | |
| 
 | |
| use std::io::{self, Read};
 | |
| 
 | |
| macro_rules! regex {
 | |
|     ($re:expr) => {
 | |
|         ::regex::Regex::new($re).unwrap()
 | |
|     };
 | |
| }
 | |
| 
 | |
| fn main() {
 | |
|     let mut seq = String::with_capacity(50 * (1 << 20));
 | |
|     io::stdin().read_to_string(&mut seq).unwrap();
 | |
|     let ilen = seq.len();
 | |
| 
 | |
|     seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
 | |
|     let clen = seq.len();
 | |
| 
 | |
|     let variants = vec![
 | |
|         regex!("agggtaaa|tttaccct"),
 | |
|         regex!("[cgt]gggtaaa|tttaccc[acg]"),
 | |
|         regex!("a[act]ggtaaa|tttacc[agt]t"),
 | |
|         regex!("ag[act]gtaaa|tttac[agt]ct"),
 | |
|         regex!("agg[act]taaa|ttta[agt]cct"),
 | |
|         regex!("aggg[acg]aaa|ttt[cgt]ccct"),
 | |
|         regex!("agggt[cgt]aa|tt[acg]accct"),
 | |
|         regex!("agggta[cgt]a|t[acg]taccct"),
 | |
|         regex!("agggtaa[cgt]|[acg]ttaccct"),
 | |
|     ];
 | |
|     for re in variants {
 | |
|         println!("{} {}", re.to_string(), re.find_iter(&seq).count());
 | |
|     }
 | |
| 
 | |
|     let substs = vec![
 | |
|         (b'B', "(c|g|t)"),
 | |
|         (b'D', "(a|g|t)"),
 | |
|         (b'H', "(a|c|t)"),
 | |
|         (b'K', "(g|t)"),
 | |
|         (b'M', "(a|c)"),
 | |
|         (b'N', "(a|c|g|t)"),
 | |
|         (b'R', "(a|g)"),
 | |
|         (b'S', "(c|g)"),
 | |
|         (b'V', "(a|c|g)"),
 | |
|         (b'W', "(a|t)"),
 | |
|         (b'Y', "(c|t)"),
 | |
|     ]; // combined into one regex in `replace_all`
 | |
|     let seq = replace_all(&seq, substs);
 | |
| 
 | |
|     println!("\n{}\n{}\n{}", ilen, clen, seq.len());
 | |
| }
 | |
| 
 | |
| fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
 | |
|     let mut replacements = vec![""; 256];
 | |
|     let mut alternates = vec![];
 | |
|     for (re, replacement) in substs {
 | |
|         replacements[re as usize] = replacement;
 | |
|         alternates.push((re as char).to_string());
 | |
|     }
 | |
| 
 | |
|     let re = regex!(&alternates.join("|"));
 | |
|     let mut new = String::with_capacity(text.len());
 | |
|     let mut last_match = 0;
 | |
|     for m in re.find_iter(text) {
 | |
|         new.push_str(&text[last_match..m.start()]);
 | |
|         new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
 | |
|         last_match = m.end();
 | |
|     }
 | |
|     new.push_str(&text[last_match..]);
 | |
|     new
 | |
| }
 |