Skip to content

切片方法与安全

> 学习切片的常用方法(trim、split、windows 等),理解边界检查与 UTF-8 安全机制。

常用切片方法

字符串方法

rust
fn main() {
    let s = String::from("  Hello, Rust World!  ");

    // 去除空白
    println!("去除空白:'{}'", s.trim());

    // 大小写
    println!("大写:'{}'", s.to_uppercase());
    println!("小写:'{}'", s.to_lowercase());

    // 包含检查
    println!("包含 Rust: {}", s.contains("Rust"));
    println!("以空格开头:{}", s.starts_with(' '));
    println!("以空格结尾:{}", s.ends_with(' '));

    // 查找
    println!("find 'Rust': {:?}", s.find("Rust"));
    println!("rfind 'o': {:?}", s.rfind('o'));

    // 替换
    println!("替换:'{}'", s.replace("Rust", "World"));

    // 分割
    let words: Vec<&str> = s.split_whitespace().collect();
    println!("单词:{:?}", words);

    // 行数
    println!("行数:{}", s.lines().count());
}
▶ Run

切片迭代

rust
fn main() {
    let numbers = [1, 2, 3, 4, 5];

    // 迭代
    println!("迭代:");
    for n in numbers.iter() {
        print!("{} ", n);
    }
    println!();

    // 带索引迭代
    println!("带索引:");
    for (i, n) in numbers.iter().enumerate() {
        print!("{}:{} ", i, n);
    }
    println!();

    // 滑动窗口
    println!("滑动窗口 (大小 3):");
    for window in numbers.windows(3) {
        println!("{:?}", window);
    }

    // 分块
    println!("分块 (大小 2):");
    for chunk in numbers.chunks(2) {
        println!("{:?}", chunk);
    }
}
▶ Run

完整示例

示例 1:查找第一个单词

rust
/// 返回字符串的第一个单词
fn first_word(s: &str) -> &str {
    let bytes = s.as_bytes();

    for (i, &item) in bytes.iter().enumerate() {
        if item == b' ' {
            return &s[0..i];
        }
    }

    &s[..]  // 如果没有空格,返回整个字符串
}

fn main() {
    let s1 = String::from("hello world");
    let s2 = "rust programming";
    let s3 = "single";

    println!("第一个单词:{}", first_word(&s1));  // hello
    println!("第一个单词:{}", first_word(s2));   // rust
    println!("第一个单词:{}", first_word(s3));   // single
}
▶ Run

示例 2:统计文本信息

rust
struct TextStats {
    char_count: usize,
    word_count: usize,
    line_count: usize,
    avg_word_length: f64,
}

fn analyze_text(text: &str) -> TextStats {
    let char_count = text.chars().count();
    let words: Vec<&str> = text.split_whitespace().collect();
    let word_count = words.len();
    let line_count = text.lines().count();

    let total_word_chars: usize = words.iter()
        .map(|w| w.chars().count())
        .sum();

    let avg_word_length = if word_count > 0 {
        total_word_chars as f64 / word_count as f64
    } else {
        0.0
    };

    TextStats {
        char_count,
        word_count,
        line_count,
        avg_word_length,
    }
}

fn main() {
    let text = "Rust is a systems programming language.
It is designed for safety and performance.
Rust has no garbage collector.";

    let stats = analyze_text(text);

    println!("文本统计:");
    println!("  字符数:{}", stats.char_count);
    println!("  单词数:{}", stats.word_count);
    println!("  行数:{}", stats.line_count);
    println!("  平均单词长度:{:.1}", stats.avg_word_length);
}
▶ Run

示例 3:简单的 Run-Length 编码

rust
/// 简单的字符串压缩:aaabbcccc → a3b2c4
fn run_length_encode(input: &str) -> String {
    if input.is_empty() {
        return String::new();
    }

    let mut result = String::new();
    let chars: Vec<char> = input.chars().collect();
    let mut i = 0;

    while i < chars.len() {
        let current = chars[i];
        let mut count = 1;

        // 计算相同字符的数量
        while i + count < chars.len() && chars[i + count] == current {
            count += 1;
        }

        // 添加到结果
        result.push(current);
        if count > 1 {
            result.push_str(&count.to_string());
        }

        i += count;
    }

    result
}

fn main() {
    let tests = [
        "aaabbcccc",
        "abcdef",
        "aaaaaa",
        "aabbccddee",
    ];

    for test in tests {
        let encoded = run_length_encode(test);
        println!("{} → {}", test, encoded);
    }
}
▶ Run

输出:

aaabbcccc → a3b2c4
abcdef → abcdef
aaaaaa → a6
aabbccddee → a2b2c2d2e2

安全特性

边界检查

rust
fn main() {
    let numbers = [1, 2, 3, 4, 5];

    // 切片会进行边界检查
    // 如果范围无效,会 panic

    // ❌ 这会 panic
    // let bad = &numbers[10..15];

    // ✅ 安全做法:使用 get
    match numbers.get(10..) {
        Some(slice) => println!("{:?}", slice),
        None => println!("索引超出范围"),
    }

    // ✅ 或者先检查
    if numbers.len() >= 10 {
        let slice = &numbers[5..10];
        println!("{:?}", slice);
    } else {
        println!("数组太短");
    }
}
▶ Run

UTF-8 字符边界

rust
fn main() {
    let s = String::from("你好世界");

    // ❌ 错误:在字符边界外切片
    // &s[0..1];  // panic! 汉字是 3 字节,0..1 不是有效边界

    // ✅ 正确:使用字符迭代
    for c in s.chars() {
        println!("{}", c);
    }

    // ✅ 正确:在字符边界切片
    // 每个汉字 3 字节
    let first = &s[0..3];   // "你"
    let second = &s[3..6];  // "好"
    println!("{} {}", first, second);

    // ✅ 安全切片:使用 get
    match s.get(0..3) {
        Some(slice) => println!("{}", slice),
        None => println!("无效的字符边界"),
    }
}
▶ Run

panic 信息

当切片边界无效时:

thread 'main' panicked at 'byte index 1 is not a char boundary; it is inside '' (bytes 0..3) of ``'

或者:

thread 'main' panicked at 'range end index 100 out of range for slice of length 5'

小结

  • 字符串方法:trim()split()contains()replace()
  • 切片迭代:iter()enumerate()windows()chunks()
  • 边界检查:超出范围会 panic,使用 get() 安全访问
  • UTF-8 安全:在字符边界切片,使用 is_char_boundary() 检查

练习题

详见:练习题