From dff3e7ddc501628d4e1e6b1b7e8dc16ba4becc9f Mon Sep 17 00:00:00 2001 From: CyC2018 <1029579233@qq.com> Date: Thu, 7 Jun 2018 10:40:56 +0800 Subject: [PATCH] auto commit --- notes/算法.md | 538 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 364 insertions(+), 174 deletions(-) diff --git a/notes/算法.md b/notes/算法.md index d1015dc7..72d0daa6 100644 --- a/notes/算法.md +++ b/notes/算法.md @@ -28,10 +28,11 @@ * [排序算法的比较](#排序算法的比较) * [Java 的排序算法实现](#java-的排序算法实现) * [六、查找](#六查找) + * [链表实现无序符号表](#链表实现无序符号表) * [二分查找实现有序符号表](#二分查找实现有序符号表) * [二叉查找树](#二叉查找树) * [2-3 查找树](#2-3-查找树) - * [红黑二叉查找树](#红黑二叉查找树) + * [红黑树](#红黑树) * [散列表](#散列表) * [应用](#应用) * [参考资料](#参考资料) @@ -235,8 +236,8 @@ public interface MyStack extends Iterable { ```java public class ArrayStack implements MyStack { - // 栈元素数组 - private Item[] a = (Item[]) new Object[1]; // 只能通过转型来创建泛型数组 + // 栈元素数组,只能通过转型来创建泛型数组 + private Item[] a = (Item[]) new Object[1]; // 元素数量 private int N = 0; @@ -1127,12 +1128,119 @@ Java 主要排序方法为 java.util.Arrays.sort(),对于原始数据类型使 # 六、查找 -符号表是一种存储键值对的数据结构,主要支持两种操作:插入一个新的键值对、根据给定键得到值。 +符号表(Symbol Table)是一种存储键值对的数据结构,可以支持快速查找操作。 符号表分为有序和无序两种,有序符号表主要指支持 min()、max() 等根据键的大小关系来实现的操作。 有序符号表的键需要实现 Comparable 接口。 +```java +public interface UnorderedST { + + int size(); + + Value get(Key key); + + void put(Key key, Value value); + + void delete(Key key); +} +``` + +```java +public interface OrderedST, Value> { + + int size(); + + void put(Key key, Value value); + + Value get(Key key); + + Key min(); + + Key max(); + + int rank(Key key); + + List keys(Key l, Key h); +} +``` + +## 链表实现无序符号表 + +```java +public class ListUnorderedST implements UnorderedST { + + private Node first; + + private class Node { + Key key; + Value value; + Node next; + + Node(Key key, Value value, Node next) { + this.key = key; + this.value = value; + this.next = next; + } + } + + @Override + public int size() { + int cnt = 0; + Node cur = first; + while (cur != null) { + cnt++; + cur = cur.next; + } + return cnt; + } + + @Override + public void put(Key key, Value value) { + Node cur = first; + // 如果在链表中找到节点的键等于 key 就更新这个节点的值为 value + while (cur != null) { + if (cur.key.equals(key)) { + cur.value = value; + return; + } + cur = cur.next; + } + // 否则使用头插法插入一个新节点 + first = new Node(key, value, first); + } + + @Override + public void delete(Key key) { + if (first == null) + return; + if (first.key.equals(key)) + first = first.next; + Node pre = first, cur = first.next; + while (cur != null) { + if (cur.key.equals(key)) { + pre.next = cur.next; + return; + } + pre = pre.next; + cur = cur.next; + } + } + + @Override + public Value get(Key key) { + Node cur = first; + while (cur != null) { + if (cur.key.equals(key)) + return cur.value; + cur = cur.next; + } + return null; + } +} +``` + ## 二分查找实现有序符号表 使用一对平行数组,一个存储键一个存储值。 @@ -1142,58 +1250,83 @@ rank() 方法至关重要,当键在表中时,它能够知道该键的位置 复杂度:二分查找最多需要 logN+1 次比较,使用二分查找实现的符号表的查找操作所需要的时间最多是对数级别的。但是插入操作需要移动数组元素,是线性级别的。 ```java -public class BinarySearchST, Value> { +public class BinarySearchOrderedST, Value> implements OrderedST { + private Key[] keys; private Value[] values; - private int N; + private int N = 0; - public BinarySearchST(int capacity) { + public BinarySearchOrderedST(int capacity) { keys = (Key[]) new Comparable[capacity]; values = (Value[]) new Object[capacity]; } + @Override public int size() { return N; } - public Value get(Key key) { - int i = rank(key); - if (i < N && keys[i].compareTo(key) == 0) { - return values[i]; - } - return null; - } - + @Override public int rank(Key key) { - int lo = 0, hi = N - 1; - while (lo <= hi) { - int mid = lo + (hi - lo) / 2; - int cmp = key.compareTo(keys[mid]); - if (cmp == 0) return mid; - else if (cmp < 0) hi = mid - 1; - else lo = mid + 1; + int l = 0, h = N - 1; + while (l <= h) { + int m = l + (h - l) / 2; + int cmp = key.compareTo(keys[m]); + if (cmp == 0) + return m; + else if (cmp < 0) + h = m - 1; + else + l = m + 1; } - return lo; + return l; } + @Override + public List keys(Key l, Key h) { + int index = rank(l); + List list = new ArrayList<>(); + while (keys[index].compareTo(h) <= 0) { + list.add(keys[index]); + index++; + } + return list; + } + + @Override public void put(Key key, Value value) { - int i = rank(key); - if (i < N && keys[i].compareTo(key) == 0) { - values[i] = value; + int index = rank(key); + // 如果找到已经存在的节点键位 key,就更新这个节点的值为 value + if (index < N && keys[index].compareTo(key) == 0) { + values[index] = value; return; } - for (int j = N; j > i; j--) { + // 否则在数组中插入新的节点,需要先将插入位置之后的元素都向后移动一个位置 + for (int j = N; j > index; j--) { keys[j] = keys[j - 1]; values[j] = values[j - 1]; } - keys[i] = key; - values[i] = value; + keys[index] = key; + values[index] = value; N++; } - public Key ceiling(Key key){ - int i = rank(key); - return keys[i]; + @Override + public Value get(Key key) { + int index = rank(key); + if (index < N && keys[index].compareTo(key) == 0) + return values[index]; + return null; + } + + @Override + public Key min() { + return keys[0]; + } + + @Override + public Key max() { + return keys[N - 1]; } } ``` @@ -1215,31 +1348,41 @@ BST 有一个重要性质,就是它的中序遍历结果递增排序。 基本数据结构: ```java -public class BST, Value> { - private Node root; +public class BST, Value> implements OrderedST { - private class Node { - private Key key; - private Value val; - private Node left, right; - // 以该节点为根的子树中节点总数 - private int N; + protected Node root; - public Node(Key key, Value val, int N) { + protected class Node { + Key key; + Value val; + Node left; + Node right; + // 以该节点为根的子树节点总数 + int N; + // 红黑树中使用 + boolean color; + + Node(Key key, Value val, int N) { this.key = key; this.val = val; this.N = N; } } + @Override public int size() { return size(root); } private int size(Node x) { - if (x == null) return 0; + if (x == null) + return 0; return x.N; } + + protected void recalculateSize(Node x) { + x.N = size(x.left) + size(x.right) + 1; + } } ``` @@ -1252,15 +1395,21 @@ public class BST, Value> { - 否则递归地在子树中查找:如果被查找的键较小就在左子树中查找,较大就在右子树中查找。 ```java +@Override public Value get(Key key) { return get(root, key); } + private Value get(Node x, Key key) { - if (x == null) return null; + if (x == null) + return null; int cmp = key.compareTo(x.key); - if (cmp == 0) return x.val; - else if (cmp < 0) return get(x.left, key); - else return get(x.right, key); + if (cmp == 0) + return x.val; + else if (cmp < 0) + return get(x.left, key); + else + return get(x.right, key); } ``` @@ -1271,16 +1420,22 @@ private Value get(Node x, Key key) {

```java -public void put(Key key, Value val) { - root = put(root, key, val); + @Override +public void put(Key key, Value value) { + root = put(root, key, value); } -private Node put(Node x, Key key, Value val) { - if (x == null) return new Node(key, val, 1); + +private Node put(Node x, Key key, Value value) { + if (x == null) + return new Node(key, value, 1); int cmp = key.compareTo(x.key); - if (cmp == 0) x.val = val; - else if (cmp < 0) x.left = put(x.left, key, val); - else x.right = put(x.right, key, val); - x.N = size(x.left) + size(x.right) + 1; + if (cmp == 0) + x.val = value; + else if (cmp < 0) + x.left = put(x.left, key, value); + else + x.right = put(x.right, key, value); + recalculateSize(x); return x; } ``` @@ -1306,20 +1461,21 @@ floor(key):小于等于键的最大键 ```java public Key floor(Key key) { Node x = floor(root, key); - if (x == null) return null; + if (x == null) + return null; return x.key; } + private Node floor(Node x, Key key) { - if (x == null) return null; + if (x == null) + return null; int cmp = key.compareTo(x.key); - if (cmp == 0) return x; - if (cmp < 0) return floor(x.left, key); - Node t = floor(x.right, key); - if (t != null) { - return t; - } else { + if (cmp == 0) return x; - } + if (cmp < 0) + return floor(x.left, key); + Node t = floor(x.right, key); + return t != null ? t : x; } ``` @@ -1332,24 +1488,37 @@ rank(key) 返回 key 的排名。 - 如果大于,递归计算在右子树中的排名,并加上左子树的节点数,再加上 1(根节点)。 ```java +@Override public int rank(Key key) { return rank(key, root); } + private int rank(Key key, Node x) { - if (x == null) return 0; + if (x == null) + return 0; int cmp = key.compareTo(x.key); - if (cmp == 0) return size(x.left); - else if (cmp < 0) return rank(key, x.left); - else return 1 + size(x.left) + rank(key, x.right); + if (cmp == 0) + return size(x.left); + else if (cmp < 0) + return rank(key, x.left); + else + return 1 + size(x.left) + rank(key, x.right); } ``` ### 6. min() ```java +@Override +public Key min() { + return min(root).key; +} + private Node min(Node x) { - if (x == null) return null; - if (x.left == null) return x; + if (x == null) + return null; + if (x.left == null) + return x; return min(x.left); } ``` @@ -1364,10 +1533,12 @@ private Node min(Node x) { public void deleteMin() { root = deleteMin(root); } + public Node deleteMin(Node x) { - if (x.left == null) return x.right; + if (x.left == null) + return x.right; x.left = deleteMin(x.left); - x.N = size(x.left) + size(x.right) + 1; + recalculateSize(x); return x; } ``` @@ -1379,25 +1550,29 @@ public Node deleteMin(Node x) {

- ```java public void delete(Key key) { root = delete(root, key); } private Node delete(Node x, Key key) { - if (x == null) return null; + if (x == null) + return null; int cmp = key.compareTo(x.key); - if (cmp < 0) x.left = delete(x.left, key); - else if (cmp > 0) x.right = delete(x.right, key); + if (cmp < 0) + x.left = delete(x.left, key); + else if (cmp > 0) + x.right = delete(x.right, key); else { - if (x.right == null) return x.left; - if (x.left == null) return x.right; + if (x.right == null) + return x.left; + if (x.left == null) + return x.right; Node t = x; x = min(t.right); x.right = deleteMin(t.right); x.left = t.left; } - x.N = size(x.left) + size(x.right) + 1; + recalculateSize(x); return x; } ``` @@ -1407,18 +1582,24 @@ private Node delete(Node x, Key key) { 利用二叉查找树中序遍历的结果为递增的特点。 ```java -public Iterable keys(Key lo, Key hi) { - Queue queue = new LinkedList<>(); - keys(root, queue, lo, hi); - return queue; +@Override +public List keys(Key l, Key h) { + return keys(root, l, h); } -private void keys(Node x, Queue queue, Key lo, Key hi) { - if (x == null) return; - int cmpLo = lo.compareTo(x.key); - int cmpHi = hi.compareTo(x.key); - if (cmpLo < 0) keys(x.left, queue, lo, hi); - if (cmpLo <= 0 && cmpHi >= 0) queue.add(x.key); - if (cmpHi > 0) keys(x.right, queue, lo, hi); + +private List keys(Node x, Key l, Key h) { + List list = new ArrayList<>(); + if (x == null) + return list; + int cmpL = l.compareTo(x.key); + int cmpH = h.compareTo(x.key); + if (cmpL < 0) + list.addAll(keys(x.left, l, h)); + if (cmpL <= 0 && cmpH >= 0) + list.add(x.key); + if (cmpH > 0) + list.addAll(keys(x.right, l, h)); + return list; } ``` @@ -1436,13 +1617,13 @@ private void keys(Node x, Queue queue, Key lo, Key hi) { 插入操作和 BST 的插入操作有很大区别,BST 的插入操作是先进行一次未命中的查找,然后再将节点插入到对应的空链接上。但是 2-3 查找树如果也这么做的话,那么就会破坏了平衡性。它是将新节点插入到叶子节点上。 -根据叶子节点的类型不同,有不同的处理方式。 +根据叶子节点的类型不同,有不同的处理方式: -插入到 2- 节点上,那么直接将新节点和原来的节点组成 3- 节点即可。 +- 如果插入到 2- 节点上,那么直接将新节点和原来的节点组成 3- 节点即可。

-如果是插入到 3- 节点上,就会产生一个临时 4- 节点时,需要将 4- 节点分裂成 3 个 2- 节点,并将中间的 2- 节点移到上层节点中。如果上移操作继续产生临时 4- 节点则一直进行分裂上移,直到不存在临时 4- 节点。 +- 如果是插入到 3- 节点上,就会产生一个临时 4- 节点时,需要将 4- 节点分裂成 3 个 2- 节点,并将中间的 2- 节点移到上层节点中。如果上移操作继续产生临时 4- 节点则一直进行分裂上移,直到不存在临时 4- 节点。

@@ -1452,7 +1633,7 @@ private void keys(Node x, Queue queue, Key lo, Key hi) { 2-3 查找树的查找和插入操作复杂度和插入顺序无关,在最坏的情况下查找和插入操作访问的节点必然不超过 logN 个,含有 10 亿个节点的 2-3 查找树最多只需要访问 30 个节点就能进行任意的查找和插入操作。 -## 红黑二叉查找树 +## 红黑树 2-3 查找树需要用到 2- 节点和 3- 节点,红黑树使用红链接来实现 3- 节点。指向一个节点的链接颜色如果为红色,那么这个节点和上层节点表示的是一个 3- 节点,而黑色则是普通链接。 @@ -1460,36 +1641,21 @@ private void keys(Node x, Queue queue, Key lo, Key hi) { 红黑树具有以下性质: -1. 红链接都为左链接; -2. 完美黑色平衡,即任意空链接到根节点的路径上的黑链接数量相同。 +- 红链接都为左链接; +- 完美黑色平衡,即任意空链接到根节点的路径上的黑链接数量相同。 画红黑树时可以将红链接画平。

```java -public class RedBlackBST, Value> { - private Node root; +public class RedBlackBST, Value> extends BST { private static final boolean RED = true; private static final boolean BLACK = false; - private class Node { - Key key; - Value val; - Node left, right; - int N; - boolean color; - - Node(Key key, Value val, int n, boolean color) { - this.key = key; - this.val = val; - N = n; - this.color = color; - } - } - private boolean isRed(Node x) { - if (x == null) return false; + if (x == null) + return false; return x.color == RED; } } @@ -1509,7 +1675,7 @@ public Node rotateLeft(Node h) { x.color = h.color; h.color = RED; x.N = h.N; - h.N = 1 + size(h.left) + size(h.right); + recalculateSize(h); return x; } ``` @@ -1527,7 +1693,7 @@ public Node rotateRight(Node h) { x.color = h.color; h.color = RED; x.N = h.N; - h.N = 1 + size(h.left) + size(h.right); + recalculateSize(h); return x; } ``` @@ -1539,7 +1705,7 @@ public Node rotateRight(Node h) {

```java -void flipColors(Node h){ +void flipColors(Node h) { h.color = RED; h.left.color = BLACK; h.right.color = BLACK; @@ -1557,23 +1723,34 @@ void flipColors(Node h){

```java -public void put(Key key, Value val) { - root = put(root, key, val); +@Override +public void put(Key key, Value value) { + root = put(root, key, value); root.color = BLACK; } -private Node put(Node x, Key key, Value val) { - if (x == null) return new Node(key, val, 1, RED); +private Node put(Node x, Key key, Value value) { + if (x == null) { + Node node = new Node(key, value, 1); + node.color = RED; + return node; + } int cmp = key.compareTo(x.key); - if (cmp == 0) x.val = val; - else if (cmp < 0) x.left = put(x.left, key, val); - else x.right = put(x.right, key, val); + if (cmp == 0) + x.val = value; + else if (cmp < 0) + x.left = put(x.left, key, value); + else + x.right = put(x.right, key, value); - if (isRed(x.right) && !isRed(x.left)) x = rotateLeft(x); - if (isRed(x.left) && isRed(x.left.left)) x = rotateRight(x); - if (isRed(x.left) && isRed(x.right)) flipColors(x); + if (isRed(x.right) && !isRed(x.left)) + x = rotateLeft(x); + if (isRed(x.left) && isRed(x.left.left)) + x = rotateRight(x); + if (isRed(x.left) && isRed(x.right)) + flipColors(x); - x.N = size(x.left) + size(x.right) + 1; + recalculateSize(x); return x; } ``` @@ -1598,13 +1775,13 @@ private Node put(Node x, Key key, Value val) { 对于一个大小为 M 的散列表,散列函数能够把任意键转换为 [0, M-1] 内的正整数,该正整数即为 hash 值。 -散列表有冲突的存在,也就是两个不同的键可能有相同的 hash 值。 +散列表存在冲突,也就是两个不同的键可能有相同的 hash 值。 散列函数应该满足以下三个条件: -1. 一致性:相等的键应当有相等的 hash 值,两个键相等表示调用 equals() 返回的值相等。 -2. 高效性:计算应当简便,有必要的话可以把 hash 值缓存起来,在调用 hash 函数时直接返回。 -3. 均匀性:所有键的 hash 值应当均匀地分布到 [0, M-1] 之间,这个条件至关重要,直接影响到散列表的性能。 +- 一致性:相等的键应当有相等的 hash 值,两个键相等表示调用 equals() 返回的值相等。 +- 高效性:计算应当简便,有必要的话可以把 hash 值缓存起来,在调用 hash 函数时直接返回。 +- 均匀性:所有键的 hash 值应当均匀地分布到 [0, M-1] 之间,这个条件至关重要,直接影响到散列表的性能。 除留余数法可以将整数散列到 [0, M-1] 之间,例如一个正整数 k,计算 k%M 既可得到一个 [0, M-1] 之间的 hash 值。注意 M 必须是一个素数,否则无法利用键包含的所有信息。例如 M 为 10k,那么只能利用键的后 k 位。 @@ -1616,7 +1793,7 @@ private Node put(Node x, Key key, Value val) { ```java int hash = 0; -for(int i = 0; i < s.length(); i++) +for (int i = 0; i < s.length(); i++) hash = (R * hash + s.charAt(i)) % M; ``` @@ -1637,16 +1814,23 @@ int hash = (x.hashCode() & 0x7fffffff) % M; 使用 Java 自带的 HashMap 等自带的哈希表实现时,只需要去实现 Key 类型的 hashCode() 函数即可。Java 规定 hashCode() 能够将键均匀分布于所有的 32 位整数,Java 中的 String、Integer 等对象的 hashCode() 都能实现这一点。以下展示了自定义类型如何实现 hashCode()。 ```java -public class Transaction{ +public class Transaction { private final String who; private final Date when; private final double amount; - public int hashCode(){ + public Transaction(String who, Date when, double amount) { + this.who = who; + this.when = when; + this.amount = amount; + } + + public int hashCode() { int hash = 17; - hash = 31 * hash + who.hashCode(); - hash = 31 * hash + when.hashCode(); - hash = 31 * hash + ((Double) amount).hashCode(); + int R = 31; + hash = R * hash + who.hashCode(); + hash = R * hash + when.hashCode(); + hash = R * hash + ((Double) amount).hashCode(); return hash; } } @@ -1667,11 +1851,11 @@ public class Transaction{

```java -public class LinearProbingHashST { - private int N; +public class LinearProbingHashST implements UnorderedST { + private int N = 0; private int M = 16; private Key[] keys; - private Value[] vals; + private Value[] values; public LinearProbingHashST() { init(); @@ -1684,7 +1868,7 @@ public class LinearProbingHashST { private void init() { keys = (Key[]) new Object[M]; - vals = (Value[]) new Object[M]; + values = (Value[]) new Object[M]; } private int hash(Key key) { @@ -1697,11 +1881,10 @@ public class LinearProbingHashST { ```java public Value get(Key key) { - for (int i = hash(key); keys[i] != null; i = (i + 1) % M) { - if (keys[i].equals(key)) { - return vals[i]; - } - } + for (int i = hash(key); keys[i] != null; i = (i + 1) % M) + if (keys[i].equals(key)) + return values[i]; + return null; } ``` @@ -1709,18 +1892,22 @@ public Value get(Key key) { **(二)插入** ```java -public void put(Key key, Value val) { +public void put(Key key, Value value) { + resize(); + putInternal(key, value); +} + +private void putInternal(Key key, Value value) { int i; - for (i = hash(key); keys[i] != null; i = (i + 1) % M) { + for (i = hash(key); keys[i] != null; i = (i + 1) % M) if (keys[i].equals(key)) { - vals[i] = val; + values[i] = value; return; } - } + keys[i] = key; - vals[i] = val; + values[i] = value; N++; - resize(); } ``` @@ -1730,21 +1917,26 @@ public void put(Key key, Value val) { ```java public void delete(Key key) { - if (!contains(key)) return; int i = hash(key); - while (!key.equals(keys[i])) { + while (keys[i] != null && !key.equals(keys[i])) i = (i + 1) % M; - } + + // 不存在,直接返回 + if (keys[i] == null) + return; + keys[i] = null; - vals[i] = null; + values[i] = null; + + // 将之后相连的键值对重新插入 i = (i + 1) % M; while (keys[i] != null) { Key keyToRedo = keys[i]; - Value valToRedo = vals[i]; + Value valToRedo = values[i]; keys[i] = null; - vals[i] = null; + values[i] = null; N--; - put(keyToRedo, valToRedo); + putInternal(keyToRedo, valToRedo); i = (i + 1) % M; } N--; @@ -1764,19 +1956,20 @@ public void delete(Key key) { ```java private void resize() { - if (N >= M / 2) resize(2 * M); - else if (N <= M / 8) resize(M / 2); + if (N >= M / 2) + resize(2 * M); + else if (N <= M / 8) + resize(M / 2); } private void resize(int cap) { - LinearProbingHashST t = new LinearProbingHashST<>(cap); - for (int i = 0; i < M; i++) { - if (keys[i] != null) { - t.put(keys[i], vals[i]); - } - } + LinearProbingHashST t = new LinearProbingHashST(cap); + for (int i = 0; i < M; i++) + if (keys[i] != null) + t.putInternal(keys[i], values[i]); + keys = t.keys; - vals = t.vals; + values = t.values; M = t.M; } ``` @@ -1814,11 +2007,9 @@ public class SparseVector { public SparseVector(double[] vector) { hashMap = new HashMap<>(); - for (int i = 0; i < vector.length; i++) { - if (vector[i] != 0) { + for (int i = 0; i < vector.length; i++) + if (vector[i] != 0) hashMap.put(i, vector[i]); - } - } } public double get(int i) { @@ -1827,9 +2018,8 @@ public class SparseVector { public double dot(SparseVector other) { double sum = 0; - for (int i : hashMap.keySet()) { + for (int i : hashMap.keySet()) sum += this.get(i) * other.get(i); - } return sum; } }