用c/c++ 编写一个 list 操作程序

介绍

本文试图通过迭代编程，一点一点改进一个关于 list 操作的程序。

基本结构

struct List {
    int value;
    struct List * next;
};

这是一个单链表结构。

#include <iostream>
using namespace std;

struct List {
    int value;
    struct List * next;
};

List * MakeList (int value, List * next) {
    return new List{value, next};
}

ostream& operator<<(ostream& out, const List* head) {
    out << "(";
    for(auto i = head; i ; i = i->next){
        out << " " << i->value;
    }
    out << ")";
    return out;
}

int main(int argc, char *argv[])
{
    List * lst = MakeList(1, MakeList(2, MakeList(3, nullptr)));
    cout << lst << endl;
    return 0;
}

输出结果

( 1 2 3)

使用构造函数

上面的程序没有使用构造函数，使用构造函数，看起来更加清晰。

#include <iostream>
using namespace std;

class List {
   public:
    List(): head_(nullptr) {};
    void push_front(int value) {
        head_ = new Cons(value, head_);
    }
    bool empty() {
        return head_ == nullptr;
    }
   private:
    struct Cons{
        Cons(int value, Cons* next) : value_(value), next_(next) {}
        int value_;
        struct Cons* next_;
    };
    Cons * head_;
    friend ostream& operator<<(ostream& out, const List& lst);
};

ostream& operator<<(ostream& out, const List& lst) {
    out << "(";
    for (auto i = lst.head_; i; i = i->next_) {
        out << " " << i->value_;
    }
    out << ")";
    return out;
}

int main(int argc, char* argv[]) {
    List lst;
    lst.push_front(1);
    lst.push_front(2);
    lst.push_front(3);
    cout << lst << endl;
    return 0;
}

输出结果

( 3 2 1)

Cons 名字来源于 Lisp 系列语言。

防止内存泄露

上面的程序明显有内存泄露，我们使用智能指针，来防止内存泄露。

#include <iostream>
#include <memory>
using namespace std;

class List {
   public:
    List() : head_(nullptr){};
    void push_front(int value) {
        head_ = make_unique<Cons>(value, std::move(head_));
    }
    bool empty() { return head_ == nullptr; }

   private:
    struct Cons {
        Cons(int value, unique_ptr<Cons>&& next)
            : value_(value), next_(std::move(next)) {}
        int value_;
        unique_ptr<Cons> next_;
    };
    unique_ptr<Cons> head_;
    friend ostream& operator<<(ostream& out, const List& lst);
};

ostream& operator<<(ostream& out, const List& lst) {
    out << "(";
    for (auto i = lst.head_.get(); i; i = i->next_.get()) {
        out << " " << i->value_;
    }
    out << ")";
    return out;
}

int main(int argc, char* argv[]) {
    List lst;
    lst.push_front(1);
    lst.push_front(2);
    lst.push_front(3);
    cout << lst << endl;
    return 0;
}

输出结果

( 3 2 1)

很遗憾， make_unique 是 c++14 的内容，如果必须使用 c++11 ，那么就需要我们自己实现一下这个函数。

#if __cplusplus <= 201103L
namespace std {
   template<typename T, typename ...Args>
   unique_ptr<T> make_unique(Args &&...args) {
       return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
   }
}

使用了 unique_ptr ，我们失去了一个功能，两个 list 不能共享同一段尾巴。也许这是一个好的交换，当两个 list 共享同一段尾巴的时候，需要使用 shared_ptr ，而 unique_ptr 的开销和裸指针一样，效率很高。在大多数情况下，我们不需要这种共享数据。

从其他 container 构造一个 list

#include <iostream>
#include <memory>
#include <vector>
using namespace std;

class List {
   public:
    List() : head_(nullptr){};

    template <typename Iter>
    List(Iter begin, Iter end) : head_() {
        for (auto it = begin; it != end; ++it) {
            push_front(*it);
        }
    }

    List(const std::initializer_list<int>& c) : List(c.begin(), c.end()) {}

    template <typename Container>
    List(const Container& c) : List(c.begin(), c.end()) {}

    void push_front(int value) {
        head_ = make_unique<Cons>(value, std::move(head_));
    }
    bool empty() { return head_ == nullptr; }

   private:
    struct Cons {
        Cons(int value, unique_ptr<Cons>&& next)
            : value_(value), next_(std::move(next)) {}
        int value_;
        unique_ptr<Cons> next_;
    };
    unique_ptr<Cons> head_;
    friend ostream& operator<<(ostream& out, const List& lst);
};

ostream& operator<<(ostream& out, const List& lst) {
    out << "(";
    for (auto i = lst.head_.get(); i; i = i->next_.get()) {
        out << " " << i->value_;
    }
    out << ")";
    return out;
}

int main(int argc, char* argv[]) {
    auto lst1 = List{1, 2, 3};
    cout << lst1 << endl;

    vector<int> v1 = {4, 5, 6};
    auto lst2 = List(v1);
    cout << lst2 << endl;
    return 0;
}

输出结果

( 3 2 1)
( 6 5 4)

这里用到了 c++11 的 delegating constructors, 也就是一个构造函数，调用另一个构造函数。

高效的 append 操作

上面的例子中，我们看到列表顺序是反的。我们要顺序的追加操作，而不是从 head 插入。

这个是写本文的出发点。前面都是铺垫。本文参考在 linux 的内核代码代码中的方法。首先我们来一个简单实现。

    void push_back(int value) {
        if (!head_) {
            head_ = make_unique<Cons>(value, nullptr);
        } else {
            Cons* p = nullptr;
            for (p = head_.get(); p->next_; p = p->next_.get()) {
            }
            p->next_ = make_unique<Cons>(value, nullptr);
        }
    }

这个方法有两个问题，一个是多了一个 if 判断语句，一个是多了 for 循环语句。好的代码风格是尽量没有if for 这些语句。性能也有一点问题，每插入一个元素，都要遍历一次列表。

linux kernel 的源代码里面用间接指针，struct Cons ** tail_ ，很巧妙的实现了这个功能。我们先简单理解没有智能指针的版本。

当列表为空的时候

tail_ == &head_;
head_ == NULL;

当有一个元素的时候。

Cons e1 = {1, nullptr};
head_ == &e1;
tail_ == &head_->next;

当有两个元素的时候。

Cons e2 = {2, nullptr};
Cons e1 = {1, &e2};
head_ == &e1;
tail_ == &head_->next->next;

依次类推。增加一个间接指针之后，当我们遍历列表的时候，我们不但知道列表元素，而且还知道我们从哪里过来的。

tail_ 永远不会是指向一个无效地址。 assert(*tail_ != nullptr);
当列表为空的时候，*tail_ == &head ，即指向 head_ 的地址。
当列表不为空的时候， *tail_ == &last.next ，即指向最后一个元素的 next 的地址。

我们要追加一个元素的时候。

(*tail_) = &new_entry;
tail_ = &new_enty.next_;

就可以了。为什么呢？

如果列表为空，(*tail_)=&new_entry 导致 head_ = &new_entry
如果列表不为空，(*tail_) = &new_entry 导致 last.next_ = &new_entry

然后，tail_ = &new_entry.next_ 保证 tail_ 一直指向列表最后一个元素的 next 的地址。

下面是带有智能指针的代码。

#include <iostream>
#include <memory>
#include <vector>
using namespace std;

class List {
   public:
    List() : head_(nullptr), tail_(&head_){};

    template <typename Iter>
    List(Iter begin, Iter end) : head_(), tail_(&head_) {
        for (auto it = begin; it != end; ++it) {
            push_back(*it);
        }
    }

    List(const std::initializer_list<int>& c) : List(c.begin(), c.end()) {}

    template <typename Container>
    List(const Container& c) : List(c.begin(), c.end()) {}

    void push_front(int value) {
        head_ = make_unique<Cons>(value, std::move(head_));
    }

    void push_back(int value) {
        (*tail_) = make_unique<Cons>(value, nullptr);
        tail_ = &(*tail_)->next_;
    }

    void remove(int value) {
        (*tail_) = make_unique<Cons>(value, nullptr);
        tail_ = &(*tail_)->next_;
    }

    bool empty() { return head_ == nullptr; }

   private:
    struct Cons {
        Cons(int value, unique_ptr<Cons>&& next)
            : value_(value), next_(std::move(next)) {}
        int value_;
        unique_ptr<Cons> next_;
    };
    unique_ptr<Cons> head_;
    unique_ptr<Cons> * tail_;
    friend ostream& operator<<(ostream& out, const List& lst);
};

ostream& operator<<(ostream& out, const List& lst) {
    out << "(";
    for (auto i = lst.head_.get(); i; i = i->next_.get()) {
        out << " " << i->value_;
    }
    out << ")";
    return out;
}

int main(int argc, char* argv[]) {
    auto lst1 = List{1, 2, 3};
    cout << lst1 << endl;

    vector<int> v1 = {4, 5, 6};
    auto lst2 = List(v1);
    cout << lst2 << endl;
    return 0;
}

输出结果

( 1 2 3)
( 4 5 6)

其中关键代码是

    void push_back(int value) {
        (*tail_) = make_unique<Cons>(value, nullptr);
        tail_ = &(*tail_)->next_;
    }

删除一个元素

先来一个普通版本。

    void remove(int value) {
        Cons * p = nullptr;
        for (auto i = head_.get(); i; p = i, i = i->next_.get()) {
                if (i->value_ == value) {
                    p->next_ = std::move(i->next_);
                    break;
                }
        }
    }

这个版本是有 bug 的，如果删除的是第一个元素，就挂了，因为 p 是 nullptr.

    void remove(int value) {
        Cons * p = nullptr;
        for (auto i = head_.get(); i; p = i, i = i->next_.get()) {
            if (i->value_ == value) {
                if (p != nullptr) {
                    p->next_ = std::move(i->next_);
                } else {
                    head_ = std::move(head_->next_);
                }
                break;
            }
        }
    }

这个版本工作，但是多了一个 if 判断，让代码看起来不是那么的帅气。利用间接指针，我们可以帅气的解决这个问题。

    void remove(int value) {
        for (auto i = &head_; *i; i = &((*i)->next_)) {
            if ((*i)->value_ == value) {
                (*i) = std::move((*i)->next_);
                break;
            }
        }
    }

这个代码为什么能工作？和 tail_ 类似，在遍历的过程中 i 是一个间接指针。

i 是指向遍历的对象指针的指针。
*i 永远不会为 nullptr
当列表为空的时候， *i 指向头指针的地址。 assert(i == &head) , assert(*i == nullptr) assert(head == nullptr
当列表不为空的时候，*i 指向前一个元素的 next 的地址。 assert(i == &previous_element.next) ，也就是说， *i 是当前元素的地址， (*i)->value 是当前元素的值。
移动到下一个元素的时候，i = &((*i)->next_ ，保证 i 一直指向前一个元素的 next 地址。
当列表不空的时候，*i = (*i)->next_ ，就是说，修改前一个元素的 next 值，让他指向当前元素的下一个元素，(*i)->next ，这样就把当前元素给跳过去了。完成了删除的操作。
当列表为空的时候，循环立即结束。

#include <iostream>
#include <memory>
#include <vector>
using namespace std;

class List {
   public:
    List() : head_(nullptr), tail_(&head_){};

    template <typename Iter>
    List(Iter begin, Iter end) : head_(), tail_(&head_) {
        for (auto it = begin; it != end; ++it) {
            push_back(*it);
        }
    }

    List(const std::initializer_list<int>& c) : List(c.begin(), c.end()) {}

    template <typename Container>
    List(const Container& c) : List(c.begin(), c.end()) {}

    void push_front(int value) {
        head_ = make_unique<Cons>(value, std::move(head_));
    }

    void push_back(int value) {
        (*tail_) = make_unique<Cons>(value, nullptr);
        tail_ = &(*tail_)->next_;
    }

    void remove(int value) {
        for (auto i = &head_; *i; i = &((*i)->next_)) {
            if ((*i)->value_ == value) {
                (*i) = std::move((*i)->next_);
                break;
            }
        }
    }

    bool empty() { return head_ == nullptr; }

   private:
    struct Cons {
        Cons(int value, unique_ptr<Cons>&& next)
            : value_(value), next_(std::move(next)) {}
        int value_;
        unique_ptr<Cons> next_;
    };
    unique_ptr<Cons> head_;
    unique_ptr<Cons> * tail_;
    friend ostream& operator<<(ostream& out, const List& lst);
};

ostream& operator<<(ostream& out, const List& lst) {
    out << "(";
    for (auto i = lst.head_.get(); i; i = i->next_.get()) {
        out << " " << i->value_;
    }
    out << ")";
    return out;
}

int main(int argc, char* argv[]) {
    auto lst1 = List{1, 2, 3};
    lst1.remove(1);
    cout << lst1 << endl;

    auto lst2 = List{1, 2, 3};
    lst2.remove(3);
    cout << lst2 << endl;

    return 0;
}

输出结果

( 2 3)
( 1 2)

TODO

如果 list 十分长，那么在调用析构函数的时候，有可能导致堆栈空间不够，爆栈了。

Wang Chunye