Leetcode: Palindrome Number

Problem

Leetcode link for this question
Determine whether an integer is a palindrome. Do this without extra space.

Analyze

This is the previous post on this question
This is easy question, just find the most left digit and then compare each digit from left-right ends to center.

Code

C++ code accept by Leetcode, which exclude negative number like -121 as palindrome

class Solution {
public:
    bool isPalindrome(int x) {
        if(x < 0)
            return false;
        int highDigit = 1;
        while(x / highDigit >= 10)
            highDigit *= 10;
        int lowDigit = 1;
        while(highDigit > lowDigit){
            int high = (x / highDigit) % 10;
            int low = (x/lowDigit) % 10;
            if(high != low)
                return false;
            highDigit /= 10;
            lowDigit *= 10;
        }
        return true;
    }
};

C++ code for who consider negative number like -121 also as palindrome

class Solution {
public:
    bool isPalindrome(int x) {
        int highDigit = 1;
        while(x / highDigit >= 10 || x / highDigit <= -10)
            highDigit *= 10;
        int lowDigit = 1;
        while(highDigit > lowDigit){
            int high = (x / highDigit) % 10;
            int low = (x/lowDigit) % 10;
            if(high != low)
                return false;
            highDigit /= 10;
            lowDigit *= 10;
        }
        return true;
    }
};

Leetcode: String to Integer (atoi)

Problem

Leetcode link
Implement atoi to convert a string to an integer.

Analyze

This is the previous answer to this question

The complexity of this problem is how to handle edge cases.
For int, standard as 32-bit length, has a max value of (1<<31) - 1 and a min value of (1<<31).
When read each digit from string, need check if the result will be overflow.
And another situation is that the leading blank spaces, and possibly has + or -.

Code

class Solution {
public:
    const int max = (1<<31) - 1;
    const int min = -max - 1;

    inline bool willOverflow(int sign, int base, int digit) {
        static const int maxTenth = max / 10;
        static const int maxDigit = max % 10;
        static const int minTenth = - (min / 10);
        static const int minDigit = - (min % 10);
        if(sign == 1){
            return (base > maxTenth || base == maxTenth && digit > maxDigit);
        } else {
            return (base > minTenth || base == minTenth && digit > minDigit);
        }
    }

    // The test case only allow "    -123" like this, after hit sign or digit, can not take any invalid or blank char.
    int myAtoi(string str) {
        if (str.size() == 0) return 0;
        int sign = 1;
        bool gotNumber = false; //allow blank space before number
        bool gotSign = false; // allow <= 1 sign
        int result = 0;
        for(int i = 0; i < str.size(); ++i) {
            if(!gotNumber && str[i] == ' ') continue; 
            if(!gotNumber && !gotSign && (str[i] == '+' || str[i] == '-')){
                sign = str[i]=='-' ? -1 : 1;
                gotSign = true;
                gotNumber = true;
                continue;
            }
            if(str[i] < '0' || str[i] > '9')
                return result * sign;
            gotNumber = true;
            int digit = str[i] - '0';
            if(willOverflow(sign, result, digit))
                return sign == 1 ? max : min;
            result = result * 10 + digit;
        }
        return result * sign;
    }
};

Leetcode: Contains Duplicate II

Question

Contains Duplicate II
Given an array of integers and an integer k, find out whether there are two distinct indices i and j in the array such that nums[i] = nums[j] and the difference between i and j is at most k.

Analyze

It’s strait forward to using dictionary on this question. First run, save position of each number in the dictionary; Second run, check the dictionary to see if there is another duplication in this array and check the position of it.
But there is a situation that need take into consideration, that the array may contain the duplication multiple times. To deal with this, just checking in the first run, to see if the last position (if any) is close enough.

Code

C++ code

class Solution {
public:
    inline int abs(int v) {
        return v < 0 ? -v : v;
    }
    bool containsNearbyDuplicate(vector<int>& nums, int k) {
        map<int, int> dict;
        for(int i=0; i<nums.size(); ++i) {
            if(dict.find(nums[i]) != dict.end()) {
                if(abs(dict[nums[i]]-i) <= k)
                    return true;
            }
            dict[nums[i]]=i;
        }
        for(int j=0; j<nums.size(); ++j){
            map<int,int>::iterator it = dict.find(nums[j]);
            map<int,int>::iterator endit = dict.end();
            if(endit != it){
                int i = it->second;
                if(i != j && abs(i-j) <= k)
                    return true;
            }
        }
        return false;
    }
};

Leetcode: Integer to Roman

Question:

Integer to Roman
Given an integer, convert it to a roman numeral.
Input is guaranteed to be within the range from 1 to 3999.

Analyze:

Just handle each digit in the number.
For numbers from [0, 9], they are mapping to:
0 – ‘ ‘
1 – ‘I’
2 – ‘II’
3 – ‘III’
4 – ‘IV’
5 – ‘V’
6 – ‘VI’
7 – ‘VII’
8 – ‘VIII’
9 – ‘IX’

The patten is that for digit in [0,3], repeat ‘I’ digit times;
for digit == 4, ‘IV’
for digit in [5, 8], put digit-5 times ‘I’ after ‘V’;
for digit == 9, ‘IX’

Then from 10 – 90, using ‘X’, ‘L’, ‘C’ instead of ‘I’, ‘V’, ‘X’. So if we store all the available chars in an array, it just move the base index += 2 each time we handle next digit.

Code

C++ Code

char* intToRoman(int num) {
    char chars[] = {'I', 'V', 'X', 'L', 'C', 'D', 'M', 'C', 'D'};
    char * result = malloc(100);
    if(num <= 0 || num >= 4000)
        return result;

    int tail = 100;
    int charBase = 0;
    while(num != 0)
    {
        int digit = num % 10;
        char temp[3];
        int templen = 0;
        if(digit <= 3)
        {
            for(int i = 1; i <= digit; ++i)
            {
                temp[i-1] = chars[charBase];
            }
            templen = digit;
        } 
        else if (digit == 4)
        {
            temp[0] = chars[charBase];
            temp[1] = chars[charBase + 1];
            templen = 2;
        }
        else if(digit <= 8)
        {
            temp[0] = chars[charBase + 1];
            templen = 1;
            for(int i = 6; i <= digit; ++i)
            {
                temp[i-5] = chars[charBase];
                templen += 1;
            }
        } 
        else
        {
            temp[0] = chars[charBase];
            temp[1] = chars[charBase + 2];
            templen = 2;
        }

        //copy temp to result
        tail = tail - templen;
        for(int i = 0; i < templen; ++i)
        {
            result[tail + i] = temp[i];
        }

        charBase += 2;
        num /= 10;
    }

    //move result to start
    for(int i = tail; i < 100; ++i) {
        result[i-tail] = result[i];
        result[i] = 0;
    }

    return result;
}

LeetCode: Reverse Integer

Previous Post

This is the previous post on same question.

Question

LeetCode Link
Reverse digits of an integer.
Example1: x = 123, return 321
Example2: x = -123, return -321

Analyze

Careful about the overflow. For ruby, it’s not a problem. For c++, need more consideration.

Code

Ruby Code

# @param {Integer} x
# @return {Integer}
def reverse(x)
    sign = x >= 0 ? 1 : -1
    x = x.abs
    y = 0
    int_max = 2 ** 31 - 1
    while x > 0 do
        digit = x % 10
        y = y * 10 + digit
        if y > int_max
            y = 0
            break
        end
        x /= 10
    end
    y * sign
end

C++ Code

class Solution {
public:
    int reverse(int x) {
        int sign = 1;
        if(x < 0) {
            sign = -1;
            x = -x;
        }
        int int_max = (1 << 31) - 1; //notice the priority of << is lower than -
        int dime = int_max / 10;
        int tail = int_max % 10 + (sign == 1 ? 0 : 1);

        int y = 0;
        while(x > 0){
            int digit = x % 10;
            if(y > dime || (y == dime && digit > tail)){
                return 0;
            }
            y = y * 10 + digit;
            x /= 10;
        }
        return y * sign;
    }
};

LeetCode: ZigZag Conversion

Previous post

This is the old post on this question, using another method.

Question

Leetcode Link
The string “PAYPALISHIRING” is written in a zigzag pattern on a given number of rows like this: (you may want to display this pattern in a fixed font for better legibility)

P   A   H   N
A P L S I I G
Y   I   R

And then read line by line: “PAHNAPLSIIGYIR”
Write the code that will take a string and make this conversion given a number of rows:

string convert(string text, int nRows);
convert("PAYPALISHIRING", 3) //should return "PAHNAPLSIIGYIR".

Analyze

This question is simple, one method is that build strings for each row, and for each char in the string, determine the target row of the position based on the total row count, and append it to the target string. Then concat all the strings together in the last.

Code

The fast ruby version

# @param {String} s
# @param {Integer} num_rows
# @return {String}
def convert(s, num_rows)
    result = []
    tail_size = num_rows > 2 ? num_rows - 2 : 0
    group_size = num_rows + tail_size

    s.length.times do |i|
        in_group_position = i % group_size
        target_row = if in_group_position < num_rows
            in_group_position
        else
            num_rows - 1 - (in_group_position - num_rows)
        end
        result[target_row] ||= ""
        result[target_row] << s[i]
    end

    result
end

Screen Shot 2016-05-04 at 16.20.51

LeetCode Problem: Maximum Depth of Binary Tree

Recursively count the depth of tree node. One node’s depth is the maximum depth of it’s children’s depth + 1 or 0 if node is NULL.
递归计算就可以了,如果是空节点,深度是0,否则这个节点的子树的最大深度是其子节点最大深度 + 1。



Maximum Depth of Binary Tree Sep 30 ’12
Given a binary tree, find its maximum depth.
The maximum depth is the number of nodes along the longest path from the root node down to the farthest leaf node.



Code, 64ms pass large set

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:
    int maxDepth(TreeNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if(root == NULL) return 0;
        int leftdep = maxDepth(root->left);
        int rightdep = maxDepth(root->right);
        return 1 + std::max(leftdep,rightdep);
    }
};

LeetCode Problem: Valid Palindrome

To solving this problem, using two pointer to track the characters in the string. i, from the beginning of string; j, from the back of the string.
这个问题简单,只需要收尾两个指针相向而行,比较是否相等就可以了,过程中跳过不是字母也不是数字的字符。算法如下:

while i < j do {
  1.if s[i] is not alphanumeric, moving i forward and continue
  2.if s[j] is not alphanumeric, moving j backward and condinue
  3.if s[i] != s[j] (ignoring the cases) return false.
  4.moving i forward and j backward by one step
}
return true;



Valid Palindrome Jan 13
Given a string, determine if it is a palindrome, considering only alphanumeric characters and ignoring cases.
For example,
"A man, a plan, a canal: Panama" is a palindrome.
"race a car" is not a palindrome.
Note:
Have you consider that the string might be empty? This is a good question to ask during an interview.
For the purpose of this problem, we define empty string as valid palindrome.



Code, 60ms pass large set

class Solution {
    inline bool isAlpha(char c) {
        if(c >= 'a' && c <= 'z') return true;
        if(c >= 'A' && c <= 'Z') return true;
        if(c >= '0' && c <= '9') return true;
        return false;
    }
    inline char lower(char c) {
        if(c >= 'A' && c <= 'Z')
            return ('a' + (c-'A'));
        return c;
    }
public:
    bool isPalindrome(string s) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        int i = 0, j = s.size() - 1;
        while(i < j) {
            if(!isAlpha(s[i])) {
                ++i;
                continue;
            }
            if(!isAlpha(s[j])) {
                --j;
                continue;
            }
            if(lower(s[i++]) != lower(s[j--])) {
                return false;
            }
        }
        return true;
    }
};



Code rewrite at 2013-2-4

class Solution {
    bool isValidChar(char c) {
        if (c >= '0' && c <= '9')
            return true;
        if (c >= 'a' && c <= 'z')
            return true;
        if (c >= 'A' && c <= 'Z') {
            return true;
        }
        return false;
    }
    char lower(char c) {
        if (c >= 'A' && c <= 'Z') {
            return 'a' + c - 'A';
        }
    }
public:
    bool isPalindrome(string s) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if(s.size() <= 1) return true;
        int si=0,ei=s.size()-1;
        while(si < ei) {
            while(si < ei && !isValidChar(s[si]))
                ++si;
            while(si < ei && !isValidChar(s[ei]))
                --ei;
            if(lower(s[si]) != lower(s[ei])) {
                return false;
            }
            ++si;--ei;
        }
        return true;
    }
};



Code rewrite at 2013-03-03

class Solution {
    bool isValid(char c) {
        if (c >= '0' && c <= '9')
            return true;
        if (c >= 'a' && c <= 'z')
            return true;
        if (c >= 'A' && c <= 'Z') {
            return true;
        }
        return false;
    }
    char lower(char c) {
        if (c >= 'A' && c <= 'Z') {
            return 'a' + c - 'A';
        }
    }
public:
    bool isPalindrome(string s) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if(s.size() <= 1) return true;
        int si=0,ei=s.size()-1;
        while(si < ei) {
            if(!isValid(s[si])) {
                ++si;
                continue;
            }
            if(!isValid(s[ei])) {
                --ei;
                continue;
            }
            if(lower(s[si++]) != lower(s[ei--])) {
                return false;
            }
        }
        return true;
    }
};

LeetCode Problem: Pascal’s Triangle II

Follow the algorithm in LeetCode Problem: Pascal’s Triangle, we can simple return the required row from the result.
从上一个题的算法LeetCode Problem: Pascal’s Triangle,可以得到一个简单的方法就是,在上一题的基础上,最后返回需要的行即可。

But, we can optimize this to use constant extra space, even better than the required O(k) extra space in the problem description below.
题目要求最好能用O(k)的额外空间,但是我们可以做的更好,甚至只用常数的额外空间就好了。

 [1,3,3,1],
 [1,4,6,4,1]

Take the above two rows as example, we can see that, the element in the last row is only relative to the element above it and the one before the element above it. So we can generate the last row from the previous row in backward order in place.Thus, we do not need the extra spaces.
拿上面这两行为例,每一个元素其实只与它上面一个和左上那一个元素相关。也就是说,如果要生成元素的下标是i的话,它只和i和i-1相关。所以我们可以用从后往前的顺序原地生成后一行。这样就不用任何辅助空间了。



Pascal’s Triangle II Oct 29 ’12
Given an index k, return the kth row of the Pascal’s triangle.
For example, given k = 3,
Return [1,3,3,1].
Note:
Could you optimize your algorithm to use only O(k) extra space?



Code, 16ms pass large test set

class Solution {
public:
    vector getRow(int rowIndex) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        vector ret;
        if(rowIndex < 0) return ret;
        for(int ir = 0; ir <= rowIndex; ++ir) {
            //put an 1 at the end of this row
            ret.push_back(1);
            //handle the rest of elements backward
            for(int ic = ir - 1; ic > 0; --ic) {
                ret[ic] = ret[ic] + ret[ic - 1];
            }
        }
        return ret;
    }
};

LeetCode Problem: Pascal’s Triangle

The problem is simple, each element in the triangle is the sum of two numbers above it.
这个题目简单,三角形中每一个元素的值是其上面两个值的和。

So, just build the rows one by one according the row above it. And the first row is {1}.
根据当前行的上面一层数字,就可以逐行生成整个三角形了。初值是第一行:{1}



Pascal’s Triangle Oct 28 ’12
Given numRows, generate the first numRows of Pascal’s triangle.
For example, given numRows = 5,
Return

[
     [1],
    [1,1],
   [1,2,1],
  [1,3,3,1],
 [1,4,6,4,1]
]



Code, 12ms pass large test set

class Solution {
public:
    vector > generate(int numRows) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        vector >ret;
        if(numRows == 0) return ret;
        //first row
        ret.push_back(vector(1,1));
        //rest rows;
        for(int nr = 2; nr <= numRows; ++nr) {
            vector thisrow(nr,1);
            vector &lastrow = ret[nr-2];
            for(int ic = 1; ic < nr - 1; ++ic) {
                thisrow[ic] = lastrow[ic-1] + lastrow[ic];
            }
            ret.push_back(thisrow);
        }
        return ret;
    }
};

Red-Balck Tree

红黑树
是一颗比较平衡的二叉搜索树,算法复杂,但是保证了各种操作最差情况下是O(logn)

定义
除了普通二叉搜索树的要求以外,还有几个其它的要求:
1·根节点是黑色的
2·不能有两个相邻的红色节点,(父子不同红色)
3·叶节点是黑色的(这里定义的叶节点是指空节点,不包含数字)
4·根节点到所有叶节点的各条路径上的黑色节点数目是相等的
5·所有节点,非红即黑

性质
最长和最短的根节点到叶节点的路径长度只差不超过2倍。
所有操作都可以在O(logn)时间内完成。

查找操作
查找操作和普通二叉搜索树是一样的

插入操作
按照普通二叉搜索树的方法,将要插入的节点插入到二叉树里面,所有的节点在插入的时候都涂成红色,然后分情况进行处理
1·如果插入的节点没有父节点(即刚插入的节点是根节点),直接涂成黑色就搞定。

void insert_case1(node n) {
     if (n->parent == NULL)
         n->color = BLACK;
     else
         insert_case2(n);
 }

2·设插入的节点的父节点为F,如果F是黑色的,就什么也不用做。

void insert_case2(node n) {
     if (n->parent->color == BLACK)
         return; /* 树仍旧有效 */
     else
         insert_case3(n);
 }

3·否则的话,F是红色,那么F不是根节点,所以F必然有父节点,设父节点的父节点是G(必然是黑色)。设父节点的兄弟节点是U。如果U存在并且F和U都是红色的,那么就将FU涂成黑色,G涂成红色。然后把G当做新加入的节点从第1步开始依次查看。

 void insert_case3(node n) {
     if (uncle(n) != NULL && uncle(n)->color == RED) {
         n->parent->color = BLACK;
         uncle(n)->color = BLACK;
         grandparent(n)->color = RED;
         insert_case1(grandparent(n));
     }
     else
         insert_case4(n);
 }

(45两步都假设F是G的左子结点,如果F是G的右子结点,那么下面的操作都反向)
4·否则,要么U不存在,要么U是黑色。如果新节点N是F的左子节点,转5;否则对F做一次左旋转,并且交换新节点N和F的角色,进行第5步处理
(这一步有点诡异,怎么会出现黑色的U呢?因为在加入N之前,G(黑)-F(红)-Leaf(黑) = 2黑,G(黑)-U(黑)-…-Leaf(黑) >=3 黑,不可能啊。后来想明白了,这种情况只可能是由于第3步中交换颜色之后导致的情况。)

void insert_case4(node n) {
     if (n == n->parent->right && n->parent == grandparent(n)->left) {
         rotate_left(n->parent);
         n = n->left;
     } else if (n == n->parent->left && n->parent == grandparent(n)->right) {
         rotate_right(n->parent);
         n = n->right;
     }
     insert_case5(n);
 }

5·这种情况下,N是F的左子结点,且NF都是红色,G是黑色,U是黑色。此时对G做一次右旋转,且交换G和U的颜色,搞定。

 void insert_case5(node n) {
     n->parent->color = BLACK;
     grandparent(n)->color = RED;
     if (n == n->parent->left && n->parent == grandparent(n)->left) {
         rotate_right(grandparent(n));
     } else {
         /* Here, n == n->parent->right && n->parent == grandparent(n)->right */
         rotate_left(grandparent(n));
     }
 }

删除操作
(未完待续…)



Reference
http://blog.csdn.net/longerzone/article/details/7817533
http://zh.wikipedia.org/wiki/红黑树

Algorithm Problem: Find Out the Minimum Number that Great or Equal to a Given Number In BST

Given a BST and a Number k, find out the minimum number p that p >= k.
给定一个BST的根,以及一个数k,返回在BST中的不小于k的最小的节点。

The method is descriped below, in the code.
具体方法在代码中注释得很清楚了.

The recursive and non-recursive code is included.
下面的代码中包含了递归和非递归两种.


struct TreeNode {
    int val;
    TreeNode *left;
    TreeNode *right;
};
TreeNode *findKRecursive(TreeNode *root, int k) {
    if (NULL == root) {
        //there is no node that match the requisite.
        //在这颗空子树中是找不到目标的
        return NULL;
    }
    if (root->val == k) {
        //the node equal to k is find
        //找到了正好等于k的节点,这就是全局要找的节点。
        return root;
    } else if (root->val < k) {
        //root is less than k, so the result node is in it's right sub-tree, if there is.
        //如果根节点的值小于k,那么唯一的满足条件的节点只能去右子树中查找,如果右子树中没有,那整棵树也没有。
        return findKRecursive(root->right, k);
    } else {
        //root is > k, so the target node may be in it's left sub-tree, or is the current node.
        //如果根节点的值>k,那么值更小的满足要求的节点可能出现在左子树中
        TreeNode *knode = findKRecursive(root->left, k);
        if (knode) {
            //if a node is found in left sub-tree, then knode->value must <= cur->value, so return it
            //如果左子树中确实找到了一个节点满足要求,那么他的值一定不大于当前节点的值
            return knode;
        } else {
            //or there is no node which value is >= k, then , root is the best choice for the result.
            //否则左子树中没有>=k的节点,那最小的满足>=k要求的节点就是root了。
            return root;
        }
    }
}

TreeNode *findK(TreeNode *root, int k) {
    TreeNode *lastFound = NULL;//record the last node found which value is >= k
    TreeNode *cur = root;//current node to search for.
    while (cur) {
        if (cur->val == k) {
            //the node equal to k is find
            //找到了正好等于k的节点,这就是全局要找的节点。
            return cur;
        } else if (cur->val > k) {
            //root is > k, so the result may be in it's left sub-tree, or cur is the best result
            //如果根节点的值>k,那么值更小的满足要求的节点可能出现在左子树中,如果左子树没有的话,这就是最好的选择
            lastFound = cur;
            cur = cur->left;
        } else {
            //root is less than k, so the result node is in it's right sub-tree, if there is.
            //如果根节点的值小于k,那么唯一的满足条件的节点只能去右子树中查找,如果右子树中没有,那整棵树也没有。
            cur = cur->right;
        }
    }
    return lastFound;
}

LeetCode Problem: Populating Next Right Pointers in Each Node II

The code in the previous article LeetCode Problem: Populating Next Right Pointers in Each Node, Level traversal of binary tree is also adapted to this situation.
上一题LeetCode Problem: Populating Next Right Pointers in Each Node, Level traversal of binary tree的答案对这个题目仍然是适用的。



Populating Next Right Pointers in Each Node II
Follow up for problem “Populating Next Right Pointers in Each Node”.
What if the given tree could be any binary tree? Would your previous solution still work?
Note:
You may only use constant extra space.
For example,
Given the following binary tree,

         1
       /  \
      2    3
     / \    \
    4   5    7

After calling your function, the tree should look like:

         1 -> NULL
       /  \
      2 -> 3 -> NULL
     / \    \
    4-> 5 -> 7 -> NULL

LeetCode Problem: Populating Next Right Pointers in Each Node, Level traversal of binary tree

It’s easy to doing this by using a queue, doing a level traversal of binary tree.
题目相对简单,用一个队列来进行广度优先遍历就可以了,用一个NULL来指示每一层的结束。



Populating Next Right Pointers in Each Node
Given a binary tree
struct TreeLinkNode {
TreeLinkNode *left;
TreeLinkNode *right;
TreeLinkNode *next;
}
Populate each next pointer to point to its next right node. If there is no next right node, the next pointer should be set to NULL.
Initially, all next pointers are set to NULL.
Note:
You may only use constant extra space.
You may assume that it is a perfect binary tree (ie, all leaves are at the same level, and every parent has two children).
For example,
Given the following perfect binary tree,

         1
       /  \
      2    3
     / \  / \
    4  5  6  7

After calling your function, the tree should look like:

         1 -> NULL
       /  \
      2 -> 3 -> NULL
     / \  / \
    4->5->6->7 -> NULL



Code, 160ms pass the large test set.

/**
 * Definition for binary tree with next pointer.
 * struct TreeLinkNode {
 *  int val;
 *  TreeLinkNode *left, *right, *next;
 *  TreeLinkNode(int x) : val(x), left(NULL), right(NULL), next(NULL) {}
 * };
 */
class Solution {
public:
    void connect(TreeLinkNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        queue q;
        if(root) {
            q.push(root);
            q.push(NULL);
        }
        while(q.size()) {
            TreeLinkNode *cur = q.front();
            q.pop();
            if(cur) {
                cur->next = q.front();
                if(cur->left) q.push(cur->left);
                if(cur->right) q.push(cur->right);
            } else {
                if (q.size()) q.push(NULL);
            }
        }
    }
};



Code rewrite at 2013-1-19, more simple

class Solution {
public:
    void connect(TreeLinkNode *root) {
        queue q;
        q.push(root);
        q.push(NULL);
        while(true) {
            TreeLinkNode *cur = q.front();
            q.pop();
            if(cur) {
                cur->next = q.front();
                if(cur->left) q.push(cur->left);
                if(cur->right) q.push(cur->right);
            } else {
                if (q.size() == 0 || q.front() == NULL) return;
                q.push(NULL);
            }
        }
    }
};

Algorithm Problem: Longest Non-Descending Sub Array or Logest Increasing Subarray, Dynamic programming

The ordinary dynamic programing gives our an algorithm of time complexity O(n2).
However, we could achieve the result by O(n * logn).
Inorder to do this, we need an array named min4length to hold the minimum value in array for the last number of the subarray in each length we could get. For example, given array {1,2,0}, the first run min4len = {1}, second {1,2}, third {0,2}.
At each step i from 0 to array.size() – 1, we find out the position l in min4length that satisfy that min4len[l] > array[i], and all the values before l is small than array[i], if exists. that is min4len[j] < array[i],j < l.
This step we could do in O(log n) time by binary search.
Then if the l is equal to min4len.size(), that indicated that we get to new length record, just push the value array[i] into min4len.
Else, update the min4len[l] = array[i], if min4len[l] > array[i]. And this step will NOT break the non-decreasing attribute of the array min4len, so we could do binary search again.
At last, the min4len.size() is the max length we could achieve.



If we need to output all the values in the final sub-array, it’s much harder, we have to keep track for each length of sub-array. see the code below in function:longestNonDecreasingSubArray()



Problem
Given an array with integers, find the longest non-descending sub-array of the given array.
For example:
Given: {1,2,3,-1,0,1,2,3,4,-1}
Should find out: {-1,0,1,2,3,4} of length 6.



Codes

<

pre>
//
// main.cpp
// LongestIncreasingSubarray
//
// Created by Qiu Xiangyu on 12-12-23.
// Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

include

include

using namespace std;

void printArray(vector array) {
for (size_t i = 0; i < array.size(); ++i) {
if (i > 0) {
cout<<“,”;
}
cout<<array[i];
}
cout<<endl;
}

int lengthOfLongestNonDecreasingSubArray(vector &array) {
if (array.size() == 0) {
return 0;
}
vector min4Length;
for (int i = 0; i < array.size(); ++i) {
int l = 0,h = (int)min4Length.size() – 1;
while (l <= h) {
int mid = (l + h) / 2;
if (min4Length[mid] <= array[i]) {
l = mid + 1;
} else {
h = mid – 1;
}
}
if (l == min4Length.size()) {
min4Length.push_back(array[i]);
} else {
if (min4Length[l] > array[i]) {
min4Length[l] = array[i];
}
}
printArray(min4Length);
}

return (int)min4Length.size();

}

vector longestNonDecreasingSubArray(vector &array) {
vector ret;
if (array.size() == 0) {
return ret;
}
vector min4Length;
vector<vector > pathes;
for (int i = 0; i < array.size(); ++i) {
int l = 0,h = (int)min4Length.size() – 1;
while (l <= h) {
int mid = (l + h) / 2;
if (min4Length[mid] <= array[i]) {
l = mid + 1;
} else {
h = mid – 1;
}
}
cout<<“step “<<i<<“, value “<<array[i]<<“, position “<<l<<“, in min4len: “;
printArray(min4Length);
if (l == min4Length.size()) {
min4Length.push_back(array[i]);
vector path;
if (l > 0) {
vector &prePath = pathes[l – 1];
for (int k = 0; k < prePath.size(); ++k) {
path.push_back(prePath[k]);
}
}
path.push_back(i);
pathes.push_back(path);
cout<<“new length, min4len: “;
printArray(min4Length);
cout<<” path: “;
printArray(path);
} else {
if (min4Length[l] > array[i]) {
min4Length[l] = array[i];
if (l > 0) {
vector &prePath = pathes[l – 1];
for (int k = 0; k < prePath.size(); ++k) {
pathes[l][k] = prePath[k];
}
}
pathes[l].back() = i;
cout<<“path for “<<l<<” is:”;
printArray(pathes[l]);
}
cout<<“old for length: “< &lpath = pathes.back();
for (int i = 0; i < lpath.size(); ++i) {
ret.push_back(array[lpath[i]]);
}
return ret;
}

int main(int argc, const char * argv[])
{
vector arr = {1,2,3,-1,0,1,2,3,4,-1};
// int longest = lengthOfLongestNonDecreasingSubArray(arr);
// cout<<longest<<endl;
vector larr = longestNonDecreasingSubArray(arr);
printArray(larr);
cout<<“length : “<<larr.size()<<endl;
return 0;
}

Algoritm Problem: Add Two Unsigned Integers

To add two unsigned integers, A and B, we can exam a binary add by hand:
Let A = 5, B = 3, that is:

 0101 A
+0011 B
-------
 1000

We can notice that, if we ignore the overflow in some digital, that will be:

 0101 A
+0011 B
-------
 0110 C

Where C just is A^B.

If we consider only the overflow, we can get:

 0101 A
+0011 B
-------
 0001 D

The equation will be right: A + B = C + D << 1. And the D is just A&B
So we can conclude that A + B = (A^B) + (A&B)<<1.
When we recursively doing this, until B is zero, or the most significant bit of B is 1(in this case, A+B exceeds the bounds of unsigned int).



Codes


//
//  main.cpp
//  AddTwoUnsingedInteger
//
//  Created by Qiu Xiangyu on 12-12-21.
//  Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

#include 
using namespace std;

unsigned int add(unsigned int i0, unsigned int i1) {
    static const unsigned int hMask = 0x1 << (sizeof(unsigned int) * 8 - 1);
    while (i1 != 0) {
        unsigned int base = i0 ^ i1;
        unsigned int pro = i0 & i1;
        if (pro & hMask) {
            //over flow
            return 0;
        }
        i0 = base;
        i1 = pro << 1;
    }
    return i0;
}

int main(int argc, const char * argv[])
{

    // insert code here...
    std::cout << "Hello, World!\n";
    unsigned int uMax = ~0;
    cout<< add(uMax - 1,2);
    return 0;
}

Algorithm Problems: Two Sum

Manage two pointer to the array, one from the front, moving forward, the other from the rear, moving backward.
When the summation of the value pointed by the two pointer is equal to the given sum, then one solution is found;
Else if the summation is lower than the given sum, moving the lower pointer forward by 1 step;
Otherwise, moving the higher pointer backward by 1 step.
Doing this until the lower pointer meet the higher pointer.

Then the time complexity is O(n), space complexity is O(1).



Problem
Given an sorted integer array, ascending order, and a given number, find all the pairs in the array that sum to the given number.



Codes

//
//  main.cpp
//  TwoSum
//
//  Created by Qiu Xiangyu on 12-12-20.
//  Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

#include <iostream>
#include <vector>
using namespace std;

vector<vector<int> > twoSum(vector<int> arr, int sum) {
    vector<vector<int> > ret;
    size_t low = 0, hi = arr.size() - 1;
    while (low < hi) {
        int vlow = arr[low];
        int vhi = arr[hi];
        int cursum = vlow + vhi;
        if (cursum == sum) {
            vector<int> sol(2,vlow);
            sol[1] = vhi;
            ret.push_back(sol);
            ++low;
        } else if (cursum < sum) {
            ++low;
        } else {
            --hi;
        }
    }
    return ret;
}

int main(int argc, const char * argv[])
{

    // insert code here...
    vector<int> arr = {1,2,3,4,5,6,7,8,9,10};
    vector<vector<int>> ret = twoSum(arr, 11);
    for (int i = 0; i < ret.size(); ++i) {
        vector<int> solution = ret[i];
        cout<<solution[0]<<" , "<<solution[1]<<endl;
    }
    cout<<"total : "<<ret.size()<<endl;
    return 0;
}

Algorithm Problem:Swap the left and right sub-tree in a binary tree without recursion

Just swap the tree nodes’ left and right child, in the post traversal order.
只需要在后序遍历的过程中,交换节点的左右子树即可。



Code

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:
    void swapLeftAndRight(TreeNode *root) {
        bool forward = true;
        TreeNode *pre = NULL;
        stack trace;
        if(root) trace.push(root);
        
        while(trace.size()) {
            TreeNode *cur = trace.top();
            if(forward) {
                if(cur->left && pre != cur->left) {
                    trace.push(cur->left);
                } else {
                    forward = false;
                }
            } else {
                if(cur->right && pre != cur->right) {
                    trace.push(cur->right);
                    forward = true;
                } else {
                    //all the child has finished visit, then swap the left and right of cur
                    TreeNode *t = cur->left;
                    cur->left = cur->right;
                    cur->right = t;
                    
                    trace.pop();
                }
            }
            pre = cur;
        }
    }
};

Algorithm Problem:Intersection of Sorted Array

Binary search every element of arr1 in arr0, if find, append it to the output. And at each step, shrink the search range.



Intersection of Sorted Array
Find out the intersection of two sorted array



Code

<

pre>

//
// main.cpp
// Intersection of two Sorted Array
//
// Created by Qiu Xiangyu on 12-12-16.
// Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//
// Find out the intersection of two sorted array.

include

include

using namespace std;

vector getIntersection(vector &arr0, vector &arr1) {
vector ret;
if (arr0.size() == 0 || arr1.size() == 0) {
return ret;
}

size_t low = 0;
for (size_t i1 = 0; i1 < arr1.size(); ++i1) {
    int v1 = arr1[i1];
    //1.find the arr1[0] in arr0 using binary search
    size_t i0 = -1;//not found
    {
        size_t l = low,h = arr0.size() - 1;
        while (l <= h) {
            size_t m = (l + h) /2;
            if (arr0[m] < v1) {
                l = m + 1;
            } else if (arr0[m] > v1) {
                h = m - 1;
            } else {
                i0 = m;
                break;
            }
        }
    }
    if (i0 == -1) {
        continue;
    } else {
        //2.in case there is repeat values in arr0, move i0 towards 0 while arr0[i0] == arr1[0]
        while (i0 > low && arr0[i0-1] == arr1[0]) {
            --i0;
        }
        low = i0 + 1;
        ret.push_back(v1);
    }
}

return ret;

}

int main(int argc, const char * argv[])
{

vector<int> arr0 = {0,1,2,3,4,5,5,6,7,8,9,10};
vector<int> arr1 = {5,5,7,9};
vector<int> inte = getIntersection(arr0, arr1);
for (int i = 0; i < inte.size(); ++i) {
    cout<<inte[i]<<",";
}
cout<<endl;
return 0;

}

LeetCode Problem:Flatten Binary Tree to Linked List

We can notice that in the flattened tree, each sub node is the successor node of it’s parent node in the pre-order of the original tree. So, we can do it in recursive manner, following the steps below:
1.if root is NULL return;
2.flatten the left sub tree of root, if there is left sub-tree;
3.flatten the right sub-tree of root, if has;
4.if root has no left sub-tree, then root is flattened already, just return;
5.we need to merge the left sub-tree with the right sub-tree, by concatenate the right sub-tree to the last node in left sub-tree.
5.1.find the last node in the left sub tree, as the left is flattened, this is easy.
5.2.concatenate the right sub-tree to this node’s right child.
5.3.move the left sub-tree to the right for root.
5.4.clear the left child of root.
6.done.

<br>
Flatten Binary Tree to Linked List
Given a binary tree, flatten it to a linked list in-place.
For example,
Given

         1
        / \
       2   5
      / \   \
     3   4   6

The flattened tree should look like:

   1
    \
     2
      \
       3
        \
         4
          \
           5
            \
             6



Code:48ms to accept with large test set.

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:
    void flatten(TreeNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if (root == NULL) return;
        //1.flat the left subtree
        if (root->left)
            flatten(root->left);
        //2.flatten the right subtree
        if (root->right)
            flatten(root->right);
        //3.if no left return
        if (NULL == root->left)
            return;
        //4.insert left sub tree between root and root->right
        //4.1.find the last node in left
        TreeNode ** ptn = & (root->left->right);
        while (*ptn)
            ptn = & ((*ptn)->right);
        //4.2.connect right sub tree after left sub tree
        *ptn = root->right;
        //4.3.move left sub tree to the root's right sub tree
        root->right = root->left;
        root->left = NULL;
        
    }
};



Code rewrite at 2013-2-20, non-recursive version

class Solution {
public:
    void flatten(TreeNode *root) {
        bool f = true;
        stack t;
        TreeNode *pre = NULL;
        if(root) {
            t.push(root);
        }
        
        while(t.size()) {
            TreeNode *cur = t.top();
            if(f) {
                if(cur->left && cur->left != pre) {
                    t.push(cur->left);
                } else {
                    f = false;
                }
            } else {
                if(cur->right && cur->right != pre) {
                    t.push(cur->right);
                    f = true;
                } else {
                    t.pop();
                    //at this time, the sub-tree of cur is flattened, so just flatten the cur
                    TreeNode *left = cur->left;
                    if(left) {
                        TreeNode *lastLeft = left;
                        while(lastLeft->right) {
                            lastLeft = lastLeft->right;
                        }
                        lastLeft->right = cur->right;
                        cur->right = left;
                        cur->left = NULL;
                    }
                }
            }
            pre = cur;
        }
    }
};

Algorithm Sqrt for Double

The binary divide is straight forward solution, but some times slow.

The Newtown solution is better, but must careful for the mathematic detail. The equation is:
xk+1 = xk – (F(xk) / F'(xk) )

PS:slope(斜率), first derivative(一阶导数)



code

<

pre>
//
// main.cpp
// Sqrt
//
// Created by Qiu Xiangyu on 12-12-16.
// Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

include

include <math.h>

using namespace std;

//max error to guide the algorithms
const double _err = 0.000001;

//binary divide version
double mySqrtBinary(double num) {
if (num < 0) {
return NAN;
} else if (num == 0) {
return 0;
}
double sqmax = num > 1 ? num : 1.0;
double sqmin = num > 1 ? 1.0 : num;
double s = 0.5 * (sqmax + sqmin);
while (true) {
double ss = s * s;
if (fabs(ss – num) < _err) {
break;
}
if (ss > num) {
sqmax = s;
} else {
sqmin = s;
}
s = 0.5 * (sqmax + sqmin);
}
return s;
}

//newtown
double mySqrtNewtown(double num) {
if (num < 0) {
return NAN;
}
double s = num;
while (true) {
double ss = s * s;
double f0 = ss – num;
if (fabs(f0) < _err) {
break;
}
cout<<s<<endl;
s = s – f0 / (2.0 * s);
}
return s;
}

int main(int argc, const char * argv[])
{
// insert code here…
std::cout << mySqrtNewtown(0);
return 0;
}

Algorithm Problem: Coins,Integer Bag Problem using Dynamic Programming

We can calculate the result by dynamic programming.

When there is only one coin valued 1, there is only one way to sum up to the money.
How about the coin valued 2 is available ? We can use the coin2 by count of 0,1,…,Sum/2, and using coin1 for the rest of money which is already calculated by the former step.
Generally, when we already calculated the ways with using the first k types of coins for the money 0,1,…,Sum. When the k+1 types of coin is available, the ways for a certain sum of money tSum is the sum ways of using 0 of new coins, 1 of new coins, 2 of new coins,… and handle the rest of money by first k types of coins, while the rest of money is none-negative.

Then we can get the code below.



Coins or Bags
There are some coins valued 1,2,5,20,50,100. Given a sum of money Sum, how many ways are there by using these coins to sum up to the given money.
The classic integer bag problem is similar to this problem.



Codes:Recur version and Dynamic version

<

pre>

//
// main.cpp
// 背包问题
//
// Created by Qiu Xiangyu on 12-12-15.
// Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

// 类似背包问题
// 有一些面值的硬币,1 2 5 20 50 100,要求给出凑足某个数目的钱,总共多少方法

include

using namespace std;

static const int types = 6;
const int coins[types] = {1,2,5,20,50,100};
int waysRecur(int total, int maxAllowType) {
if (maxAllowType <= 0) {
return 1;
}
int maxCoin = coins[maxAllowType];
int ways = 0;
for (int i = 0; i <= total / maxCoin; ++i) {
ways += waysRecur(total – i * maxCoin, maxAllowType – 1);
}
return ways;
}

int waysDp(int sum) {
if (sum <= 0) {
return 1;
}
//1.build a 2-d array to hold the results
int * ways[types];
for (int it = 0 ; it < types; ++it) {
ways[it] = new int[sum + 1];
}
for (int is = 0; is <= sum; ++is) {
ways[0][is] = 1;
}
//2.dp from types 1 to types – 1 to calculate the ways
for (int it = 1; it < types; ++it) {
int coinValue = coins[it];
for (int is = 0; is <= sum; ++is) {
ways[it][is] = 0;
for (int itime = 0; itime * coinValue <= is; ++itime) {
int remain = is – itime * coinValue;
ways[it][is] += ways[it-1][remain];
}
}
}
//3.return the result
int ret = ways[types – 1][sum];
for (int it = 0 ; it < types; ++it) {
delete ways[it];
}
return ret;
}

int main(int argc, const char * argv[])
{
int total = 111;
int ways = waysRecur(total, types – 1);
int wdp = waysDp(total);
cout<<“ways from recur:”<<ways<<endl;
cout<<“ways from dp:”<<wdp<<endl;
return 0;

/*
 5:{1,1,1,1,1},{1,1,1,2},{1,2,2},{5}
 it=0:{1,1,1,1,1}
 it=1:{1,1,2,2,3}
 it=2:{1,1,2,2,4}
 */

}

LeetCode Problem:Distinct Subsequences

(2013-1-5更新了动态规划版本,见下面)
这题有点复杂,一开始拿到都不知道怎么下手。

尝试的路径是:
1·先试试看从T的size上简化看看。写一个例子S = aaaaa,T = a,很简单,数数就行。然后T=aa,发现没有太好的方法,怎么弄也算不出来最后结果。但是如果第一步查找记录下了所有的位置的话,只需要看该位置后面还有没出现T[1]就好了。比如S=aaba,T=ab,那么第一步查找得到{0,1,3},第二步只需要在这个集合内查看,S[0]后面有没有b,S[1]后面有没有b,S[3]后面有没有b。最后得到{02,12}两个结果。这样有了一个初步的算法,只不过非常消耗内存。小集合可以过,大集合内存超过限制。
2·内存如何超限的呢?比如当S=aaaa,T=aaa,的时候,第一轮结果{0,1,2,3},第二轮就变成了{01,02,03,12,13,23},可以看到,如果S再长一点的话,是很恐怖的。但是实际上从这里也能看出一点问题,就是02,12是可以合并的,因为下次它两个都会去查找S[2]之后有没有T[2],而且得到的结果是一样的。如果合并的话,不仅空间省了,时间也省了。所以就可以用一个int数组,记录在当前时刻(查找过程中T的下标),在S的某个位置结束的成功匹配个数。比如这个数组叫做matches,和S等大。那么第一轮下来matches[i] = (S[i] == T[0])。第二轮开始,针对每一个j = 1 … T.size() – 1,对matches中记录的每一个结束点去往后查找T[j],找到的话(比如在S[ii] == T[j]),那么新的matches[ii] += matches[i]。所以这里需要两个数组来回倒腾。最后写出代码220ms过了大集合测试。空间复杂度O(S.size()),时间复杂度O(S.size() * S.size() * T.size())。而且在查找的时候还可以简化,这样整体时间复杂度可以变成O(S.size() * T.size())



Distinct Subsequences
Given a string S and a string T, count the number of distinct subsequences of T in S.
A subsequence of a string is a new string which is formed from the original string by deleting some (can be none) of the characters without disturbing the relative positions of the remaining characters. (ie, “ACE” is a subsequence of “ABCDE” while “AEC” is not).
Here is an example:
S = “rabbbit”, T = “rabbit”
Return 3.



代码:220ms过大集合

class Solution {
public:
    int numDistinct(string S, string T) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if (T.size() == 0 || S.size() == 0 || S.size() < T.size())
            return 0;
        
        //at each position i in S, how many matches could be found with the substring end at S[i]
        int *matches = new int[S.size()];
        //temporary array for updating matches.
        int *mtemp = new int[S.size()];

        //1.initialize
        for(int i = 0; i < S.size(); ++i) {
            matches[i] = (S[i] == T[0] ? 1 : 0);
            mtemp[i] = 0;
        }
        
        for(int j = 1; j < T.size(); ++j) {
            const char tj = T[j];//process the char T[j]
            for(int ilast = 0; ilast < S.size(); ++ilast) {
                if (matches[ilast] == 0)//no possible matches
                    continue;
                for(int i = ilast + 1; i < S.size(); ++i) {
                    if(S[i] == tj) {
                        mtemp[i] += matches[ilast];//and the match count in ilast to new location
                    }
                }
            }
            for(int ilast = 0; ilast < S.size(); ++ilast) {
                matches[ilast] = 0;
            }
            int * t = matches;//switch matches and mtemp
            matches = mtemp;
            mtemp = t;
        }
        
        int sum = 0;
        for(int i = 0; i < S.size(); ++i) {
            sum += matches[i];
        }
        return sum;
    }
};



代码1:小集合可以过,大集合内存超过

class Solution {
public:
    int numDistinct(string S, string T) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if (T.size() == 0 || S.size() == 0)
            return 0;
        queue pls;//possible location start index in S
        
        //1.inite pls by searchin T[0] in S
        for(int i = 0; i < S.size(); ++i) {
            if (S[i] == T[0]) {
                pls.push(i);
            }
        }
        //2.one by one search T[j] in S, update the loast match index in pls or remove if no match fould
        const int levelEnd = -1;
        int j = 1;
        if(pls.size() && j < T.size()) pls.push(levelEnd);
        while(pls.size() && j < T.size()) {
            if(pls.size() > 1000) return 1000;
            char tj = T[j];
            int lasti = pls.front();
            pls.pop();
            if (lasti == levelEnd) {
                ++j;
                if (j == T.size()) 
                    break;
                if(pls.size()) pls.push(levelEnd);
            } else {
                for(int i = lasti + 1; i < S.size(); ++i) {
                    if(S[i] == tj) {
                        pls.push(i);
                    }
                }
            }
        }
        
        //3.finished
        return pls.size();
    }
};

<br>
Code rewrite at 2013-1-5
重写一次,用了动态规划搞定,时间复杂度是O(S.size() * T.size())

class Solution {
public:
    int numDistinct(string S, string T) {
        if(S.size() < T.size()) return 0;
        if(T.size() == 0) return 0;
        vector > ways (S.size(), vector(T.size(),0));
        //the first column
        ways[0][0] = (S[0] == T[0] ? 1 : 0);
        for(int is = 1; is < S.size(); ++is) {
            ways[is][0] = ways[is-1][0];
            if(T[0] == S[is]) {
                ways[is][0] += 1;
            }
        }
        //the remaining triangle
        for(int it = 1; it < T.size(); ++it) {
            //the item on the diagonal
            ways[it][it] = (S[it] == T[it] ? ways[it-1][it-1] : 0);
            //the items below
            for(int is = it + 1; is < S.size(); ++is) {
                ways[is][it] = ways[is-1][it];
                if(S[is] == T[it]) {
                    ways[is][it] += ways[is-1][it-1];
                }
            }
        }
        return ways[S.size()-1][T.size()-1];
    }
};



上面的代码用了O(S.size() * T.size())的空间,实际上没有必要,只需要2 * S.size()的空间就够了。

class Solution {
public:
    int numDistinct(string S, string T) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if(S.size() < T.size()) return 0;
        if(T.size() == 0) return 0;
        int *ways = new int[S.size()];
        int *waysTemp = new int[S.size()];
        //the first charactor for T
        ways[0] = (S[0] == T[0] ? 1 : 0);
        for(int is = 1; is < S.size(); ++is) {
            ways[is] = ways[is-1];
            if(T[0] == S[is]) {
                ways[is] += 1;
            }
        }
        //the remaining charactors in T
        for(int it = 1; it < T.size(); ++it) {
            //the item on the diagonal
            waysTemp[it] = (S[it] == T[it] ? ways[it-1] : 0);
            //the items below
            for(int is = it + 1; is < S.size(); ++is) {
                waysTemp[is] = waysTemp[is-1];
                if(S[is] == T[it]) {
                    waysTemp[is] += ways[is-1];
                }
            }
            int *temp = ways;
            ways = waysTemp;
            waysTemp = temp;
        }
        int ret = ways[S.size()-1];
        delete ways;
        delete waysTemp;
        return ret;
    }
};



code rewrite at 2013-1-14, 36ms pass the large test

/*
Let ways(x,y) denote that from first x characters in S to first y characters in T needs ways(x,y) distinct ways.
then if we knew ways(i,j), i < n, i < m, then 
ways(n,m) = S[n-1] == T[m-1] ? ways(n-1,m-1) + ways(n-1,m) : ways(n-1,m)
ways(x,0) = x; x = 0,...,S.size()
ways(y,y) = S[y-1] == T[y-1] ? ways(y-1,y-1) : 0; y = 1,...,T.size()
*/
class Solution {
public:
    int numDistinct(string S, string T) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if(S.size() < T.size()) return 0;
        int *ways = new int[S.size() + 1];
        int *waystemp = new int[S.size() + 1];
        for(int i = 0; i <= S.size(); ++i) {
            ways[i] = 1;
        }
        for(int lent = 1; lent <= T.size(); ++lent) {
            waystemp[lent] = (S[lent-1] == T[lent-1] ? ways[lent-1] : 0);
            for(int lens = lent + 1; lens <= S.size(); ++ lens) {
                waystemp[lens] = waystemp[lens - 1];
                waystemp[lens] += (S[lens-1] == T[lent-1] ? ways[lens-1] : 0);
            }
            int *temp = ways;
            ways = waystemp;
            waystemp = temp;
        }
        int ret = ways[S.size()];
        delete ways;
        delete waystemp;
        return ret;
    }
};

题目:Max Ascending

问题比较简单,就是一个数组a[n] 求max(ai-aj), i<j.

从前往后扫描,记录下当前的最小值,每次计算aj – ai,然后更新这个最大值。扫描完就得到了结果,时间复杂度O(n)

<

pre>
//
// main.cpp
// MaxAscend
//
// Created by Qiu Xiangyu on 12-12-14.
// Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

//一个数组a[n] 求max(ai-aj), i<j

include

include

using namespace std;
int maxAscend(vector nums) {
if (nums.size() <= 1) {
return 0;
}
int ret = nums[1] – nums[0];//the result
int vmin = nums[0];//track the current min
for (int i = 1; i < nums.size(); ++i) {
int v = nums[i];
if (vmin > v) {
vmin = v;
}
if (ret < v – vmin) {
ret = v – vmin;
}
}
return ret;
}

int main(int argc, const char * argv[])
{
vector testarr = {1,4,2,5,7,0};
int md = maxAscend(testarr);
cout<<“Max difference is “<<md<<endl;
return 0;
}

LeetCode题目:Convert Sorted List to Binary Search Tree

问题比上一个难一点,如果是可以用O(n)空间的话,倒是可以开一个数组放上这n个数,然后按上一个问题的解法去做。总体时间O(n),空间O(n)。

不过这题这样出应该不是这个意思。假如不用O(n)空间的话:
可以先数一下一共有多少个节点。O(n)时间。
然后去找到根节点(数数),然后根节点之前是左子树,之后是右子树,并且两个子树的节点个数都是可以直接计算出来的,递归解这个问题就可以了。
那么总体时间复杂度是O(n*logn),空间O(logn)



Convert Sorted List to Binary Search Tree
Given a singly linked list where elements are sorted in ascending order, convert it to a height balanced BST.



代码:114ms过大集合

/**
 * Definition for singly-linked list.
 * struct ListNode {
 *     int val;
 *     ListNode *next;
 *     ListNode(int x) : val(x), next(NULL) {}
 * };
 */
/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
    TreeNode *formTree(ListNode *head, int count) {
        if(count <= 0) return NULL;
        int rootIndex = count / 2;
        ListNode *rootNode = head;
        for(int ir = 0; ir < rootIndex; ++ir) {
            rootNode = rootNode->next;
        }
        TreeNode *root = new TreeNode(rootNode->val);
        root->left = formTree(head,rootIndex);
        root->right = formTree(rootNode->next, count - rootIndex - 1);
        return root;
    }
public:
    TreeNode *sortedListToBST(ListNode *head) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        
        //1. find the node count, takes O(n) time
        int nodeCount = 0;
        for(ListNode *cur = head; cur != NULL; cur = cur->next) {
            ++nodeCount;
        }
        
        //2. form the tree with the middle as the root
        return formTree(head, nodeCount);
    }
};

LeetCode题目:Convert Sorted Array to Binary Search Tree

对排序的数组进行二分查找样式的遍历就行了。



Convert Sorted Array to Binary Search Tree
Given an array where elements are sorted in ascending order, convert it to a height balanced BST.



代码:84ms过大集合

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
    TreeNode *sortedArrayToBST(vector &num,int si, int ei) {
        if(si > ei) return NULL;
        int mid = (ei + si) / 2;
        TreeNode *root = new TreeNode(num[mid]);
        root->left = sortedArrayToBST(num,si,mid - 1);
        root->right = sortedArrayToBST(num,mid + 1,ei);
        return root;
    }
    
public:
    TreeNode *sortedArrayToBST(vector &num) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        return sortedArrayToBST(num,0,num.size() - 1);
    }
};

LeetCode题目:Construct Binary Tree from Inorder and Postorder Traversal

这题和上一道题类似,也是首先在postorder中,最后那一个肯定是整棵树的根,然后在inorder中查找这个根,找到之后就能确定左子树和右子树的后序遍历和中序遍历,然后递归求解。



Construct Binary Tree from Inorder and Postorder Traversal
Given inorder and postorder traversal of a tree, construct the binary tree.
Note:
You may assume that duplicates do not exist in the tree.



代码:120ms过大集合

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:
    TreeNode *buildTree(vector &iorder, int isi, int iei, vector &porder, int psi, int pei) {
        if(iei - isi < 0 || iei - isi != pei - psi) {
            return NULL;
        }
        //the porder[pei] is the root of this tree
        TreeNode *root = new TreeNode(porder[pei]);
        //find the root in the iorder to seperate it into left sub tree and right sub tree
        int riii = -1;//root index in inorder array
        for(int i = isi; i <= iei; ++i) {
            if(iorder[i] == root->val) {
                riii = i;
                break;
            }
        }
        if(riii == -1) return root;//error
        int lnodes = riii - isi;
        //for the left sub tree
        //the isi to riii - 1 in inorder array will be it's inorder traversal
        //and the psi to psi + lnodes - 1 in postorder array will be it's post order traversal
        root->left = buildTree(iorder, isi, riii - 1, porder, psi, psi + lnodes - 1);
        //for the right sub tree is similary to the left
        root->right = buildTree(iorder, riii + 1, iei, porder, psi + lnodes, pei - 1);
        return root;
    }
    TreeNode *buildTree(vector &inorder, vector &postorder) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        return buildTree(inorder, 0, inorder.size() - 1, postorder, 0, postorder.size() -1);
    }
};

LeetCode题目:Construct Binary Tree from Preorder and Inorder Traversal

这曾经是一道很难的题目,看来这一阵的锻炼确实是有效果的。


方法是,画一棵树出来,比如:

   1
 /   \
2     3
 \    /
  4  5

这棵树好用,在于其中包含了所有的可能性,(2度,1度(左右),0度节点都有)。
它的preorder是:1,2,4,3,5
他的inorder是:2,4,1,5,3


那么递归的看这个问题,首先preorder的第一个必然是根(1),然后此节点在inorder中的下标是2,那么在inorder中,处于1之前的两个节点2,4是左子树的;反之5,3是右子树的。
针对左子树,2,4就是它的inorder,而在preorder中,除开第一个根,数两个节点的子序列正好是2,4,这是左子树的preorder。这样这个问题就自然变成递归了:
即,其左子树的preorder是(2,4),inorder是(2,4);类似有右子树preorder(3,5),inorder(5,3)。
然后边界情况是某数的preorder和inorder遍历的长度都是1,比如{x},那么这棵树就是以x为值的节点。
这样就可以写出代码了。



Construct Binary Tree from Preorder and Inorder Traversal
Given preorder and inorder traversal of a tree, construct the binary tree.
Note:
You may assume that duplicates do not exist in the tree.



代码:124ms过大集合

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
typedef TreeNode TN;
class Solution {
    TN *buildTree(vector &preorder,int psi,int pei,
                  vector &inorder, int isi, int iei) {
        if(pei - psi < 0 || pei - psi != iei - isi) 
            return NULL;
        //root of sub tree
        TN *root = new TN(preorder[psi]);
        //find this value in inorder to locate the root in inorder
        int riii = -1;//root index in inorder
        for(int itemp = isi; itemp <= iei; ++ itemp) {
            if(inorder[itemp] == root->val) {
                riii = itemp;
                break;
            }
        }
        if(riii != -1) {
            //calculate the nodes count in left tree
            int leftCount = riii - isi;
            TN *left = buildTree(preorder,psi + 1, psi + leftCount, inorder, isi, riii - 1);
            root->left = left;
            TN *right = buildTree(preorder,psi + leftCount + 1,pei, inorder, riii + 1, iei);
            root->right = right;
        }
        return root;
    }
public:
    TreeNode *buildTree(vector &preorder, vector &inorder) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        TN *root = buildTree(preorder,0,preorder.size() - 1,inorder,0,inorder.size() - 1);
    }
};

LeetCode题目:Binary Tree Zigzag Level Order Traversal

比较简单,广度优先变一下就可以了,用一个bool记录是从左到右还是从右到左,每一层结束就翻转一下。



Binary Tree Zigzag Level Order Traversal
Given a binary tree, return the zigzag level order traversal of its nodes’ values. (ie, from left to right, then right to left for the next level and alternate between).
For example:
Given binary tree {3,9,20,#,#,15,7},

    3
   / \
  9  20
    /  \
   15   7

return its zigzag level order traversal as:

[
  [3],
  [20,9],
  [15,7]
]



代码:28ms过大集合

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:
    vector > zigzagLevelOrder(TreeNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        vector > ret;
        if(NULL == root) return ret;
        
        queue que;
        que.push(root);
        que.push(NULL);//level end point
        
        bool l2r = true;//left to right
        
        vector level;
        while(true) {
            TreeNode *cur = que.front();
            que.pop();
            if(cur) {
                level.push_back(cur->val);
                if(cur->left) que.push(cur->left);
                if(cur->right) que.push(cur->right);
            } else {
                if(l2r) {
                    ret.push_back(level);
                } else {
                    vector temp;
                    for(int i = level.size() - 1 ; i >= 0; --i) {
                        temp.push_back(level[i]);
                    }
                    ret.push_back(temp);
                }
                level.erase(level.begin(),level.end());
                l2r = !l2r;
                
                if(que.size() == 0) break;
                que.push(NULL);
            }
        }
        
        return ret;
    }
};

LeetCode题目:Binary Tree Maximum Path Sum

这道题好难,因为在一颗二叉树中有多种情况,这条最长的路径有可能是从某个中间节点直接到某个叶子节点的,也有可能是两条上述路径连接在一个中间节点上(这个中间节点可能是root也可能不是),也有可能只是一个单独的节点。

情况太多导致问题很复杂,不好分析,这题花了我至少两个小时去解决。

一开始从自上而下的方法去想,想到了可以计算出每一个节点到root的最大值,然后最后拼一下最大值,得到整体最大。但是这里假设了路径都是过root的。显然不对。

然后想level优先遍历的话,如果知道了上面的情况,下面一层可解么?陷入死胡同至少30min,gg。

然后想自下而上,如果知道了左右子树的情况,自身的情况能得到么?很快就有了肯定回答。
但是这个solution也相对复杂,针对每个节点需要知道的是:
1.终止在这个节点上(往自己子树走)的最大路径值是多少
2.经过这个节点的最大值是多少?(从左子树走过自己到右子树)
3.不经过此节点的子树中可能获得的最大值是多少?

在知道这三个量之后,问题变得明确可解了。那就是递归,先计算出左右子树的这三个值,然后:
1.终止在此节点的最大路径,首先是自己的值包含进去,然后如果终止在左或右子树的根节点的最大路径值大于0的话,加上这个值。
2.经过这个节点的最大值,很简单了,左右子树的端点最大值加上自己的值。
3.不经过此节点的最大值,直接查看左右子树中的这个值(如果有左右子树的话),还有左右子树的端点最大值。

这样最后就写出一个递归的算法。如何写成循环还没想好。

2013-1-18,更新了一个更简单的办法.

Question: Binary Tree Maximum Path Sum

Given a binary tree, find the maximum path sum.
The path may start and end at any node in the tree.
For example:
Given the below binary tree,

       1
      / \
     2   3

Return 6.

代码:

248ms过大集合

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Maxes{
public:
    int tmax;//max of terminated in self
    int pmax;//max of pass self
    int nmax;//max of non-relative to self
    Maxes() {tmax = pmax = 0; nmax = (1 << (sizeof(int) * 8 - 1));}
    inline int getMax() {
        int m = tmax;
        if(m < pmax) m = pmax;
        if(m < nmax) m = nmax;
        return m;
    }
};
class Solution {
public:
    Maxes maxPath(TreeNode *root) {
        Maxes m;
        if(NULL == root)
            return m;

        Maxes l = maxPath(root->left);
        Maxes r = maxPath(root->right);

        //tmax is the max value which terminated at this node
        //when all of it's children is negative, this is it's value
        //or add the max value terminated at it's children
        m.tmax = max(l.tmax,r.tmax);
        if(m.tmax < 0) m.tmax = 0;
        m.tmax += root->val;

        //pmax is the max value which is pass this node
        //that is it's value terminated at it's children (if have, or zero), add self value
        m.pmax = l.tmax + r.tmax + root->val;

        //nmax is the max value which not including current node
        if(root->left)
            m.nmax = l.getMax();
        if(root->right) {
            int rmax = r.getMax();
            if(m.nmax < rmax) m.nmax = rmax;
        }
        return m;
    }
    int maxPathSum(TreeNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        Maxes m = maxPath(root);
        int ma = m.getMax();
        return ma;
    }
};

Code rewrite at 2013-1-18, 266ms pass large set, simpler

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
private:
    int _curMax;
    //return the max path ending in root
    //and refresh the _curMax with the path sum that go through from left to root to right child.
    int maxWithRoot(TreeNode *root) {
        if(NULL == root) return 0;
        int leftmax = maxWithRoot(root->left);
        int rightmax = maxWithRoot(root->right);

        //the max from left child to right child, accross from root
        int arcmax = root->val;
        if(leftmax > 0) arcmax += leftmax;
        if(rightmax > 0) arcmax += rightmax;
        if(_curMax < arcmax) _curMax = arcmax;

        //the max that end in root
        int pathmax = root->val;
        int submax = std::max(leftmax,rightmax);
        if(submax > 0) pathmax += submax;
        if(_curMax < pathmax) _curMax = pathmax;

        return pathmax;
    }
public:
    int maxPathSum(TreeNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        _curMax = INT_MIN;
        maxWithRoot(root);
        return _curMax;
    }
};

LeetCode题目:Binary Tree Level Order Traversal II

和上一题差不多,只不过最后输出的时候颠倒一下,另外:std::list才有push_front(),而std::vector没有这个方法。



Binary Tree Level Order Traversal II
Given a binary tree, return the bottom-up level order traversal of its nodes’ values. (ie, from left to right, level by level from leaf to root).
For example:
Given binary tree {3,9,20,#,#,15,7},

    3
   / \
  9  20
    /  \
   15   7

return its bottom-up level order traversal as:

[
  [15,7]
  [9,20],
  [3],
]



代码:40ms过大集合

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:
    vector > levelOrderBottom(TreeNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        list > retTemp;

        queue trace;
        trace.push(root);
        trace.push(NULL);
        
        vector curLevel;
        while(true) {
            TreeNode *cur = trace.front();
            trace.pop();
            if(cur) {
                curLevel.push_back(cur->val);
                if(cur->left) trace.push(cur->left);
                if(cur->right) trace.push(cur->right);
            } else {
                if(curLevel.size()) {
                    retTemp.push_front(curLevel);
                    curLevel.erase(curLevel.begin(),curLevel.end());
                    trace.push(NULL);
                } else {
                    break;
                }
            }
        }
        
        vector > ret;
        for(list >::iterator it = retTemp.begin(); it != retTemp.end(); ++it) {
            ret.push_back(*it);
        }
        return ret;
    }
};

LeetCode题目:Binary Tree Level Order Traversal

题目简单,经典方法。只不过这题需要每一个level单独作为一个vector输出,因此在每一个level结束的时候插入一个NULL标记。每当遇到这个标记,就说明一层已经完全访问了。



Binary Tree Level Order Traversal
Given a binary tree, return the level order traversal of its nodes’ values. (ie, from left to right, level by level).
For example:
Given binary tree {3,9,20,#,#,15,7},

    3
   / \
  9  20
    /  \
   15   7

return its level order traversal as:
[
[3],
[9,20],
[15,7]
]



代码:28ms过大集合

/**
 * Definition for binary tree
 * struct TreeNode {
 *     int val;
 *     TreeNode *left;
 *     TreeNode *right;
 *     TreeNode(int x) : val(x), left(NULL), right(NULL) {}
 * };
 */
class Solution {
public:
    vector > levelOrder(TreeNode *root) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        vector > ret;
        if(NULL == root) return ret;
        queue trace;
        trace.push(root);
        trace.push(NULL);
        vector levelVals;
        while(true) {
            TreeNode *cur = trace.front();
            trace.pop();
            if(NULL == cur) {
                ret.push_back(levelVals);
                levelVals.erase(levelVals.begin(),levelVals.end());
                if(trace.size())
                    trace.push(NULL);
                else
                    break;
            } else {
                levelVals.push_back(cur->val);
                if(cur->left) trace.push(cur->left);
                if(cur->right) trace.push(cur->right);
            }
        }
        return ret;
    }
};



Code rewrite at 2012-12-22, 24ms pass the large test set

class Solution {
public:
    vector > levelOrder(TreeNode *root) {
        vector > ret;
        
        queue q;
        if(root) {
            q.push(root);
            q.push(NULL);
        }
        
        vector level;
        while(q.size()) {
            TreeNode *cur = q.front();
            q.pop();
            if(cur) {
                level.push_back(cur->val);
                if(cur->left) q.push(cur->left);
                if(cur->right) q.push(cur->right);
            } else {
                ret.push_back(level);
                level.erase(level.begin(),level.end());
                if(q.size()) q.push(NULL);
            }
        }
        
        return ret;
    }
};

LeetCode题目:Best Time to Buy and Sell Stock III,一维动态规划

和前两道题比起来的话,这道题最难了,因为限制了交易次数。
解决问题的途径我想出来的是:既然最多只能完成两笔交易,而且交易之间没有重叠,那么就divide and conquer。
设i从0到n-1,那么针对每一个i,看看在prices的子序列[0,…,i][i,…,n-1]上分别取得的最大利润(第一题)即可。
这样初步一算,时间复杂度是O(n2)。


改进:
改进的方法就是动态规划了,那就是第一步扫描,先计算出子序列[0,…,i]中的最大利润,用一个数组保存下来,那么时间是O(n)。
第二步是逆向扫描,计算子序列[i,…,n-1]上的最大利润,这一步同时就能结合上一步的结果计算最终的最大利润了,这一步也是O(n)。
所以最后算法的复杂度就是O(n)的。



Best Time to Buy and Sell Stock III
Say you have an array for which the ith element is the price of a given stock on day i.
Design an algorithm to find the maximum profit. You may complete at most two transactions.
Note:
You may not engage in multiple transactions at the same time (ie, you must sell the stock before you buy again).



代码:12ms过大集合,一次性bug free,很happy

class Solution {
public:
    int maxProfit(vector &prices) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if(prices.size() <= 1)
            return 0;
            
        //stores the max profit in [0, ... , i] subarray in prices
        vector maxEndWith;
        {//build the maxEndWith.
            int lowest = prices[0];
            int maxprofit = 0;
            maxEndWith.push_back(0);
            for(int i = 1; i < prices.size(); ++i) {
                int profit = prices[i] - lowest;
                if(profit > maxprofit) {
                    maxprofit = profit;
                }
                maxEndWith.push_back(maxprofit);
                if(prices[i] < lowest) lowest = prices[i];
            }
        }
        
        int ret = maxEndWith[prices.size() - 1];
        {//reverse to see what is the maxprofit of [i, ... , n-1] subarray in prices
        //and meanwhile calculate the final result
            int highest = prices[prices.size() - 1];
            int maxprofit = 0;
            for(int i = prices.size() - 2; i >= 0; --i) {
                int profit = highest - prices[i];
                if(profit > maxprofit)  maxprofit = profit;
                int finalprofit = maxprofit + maxEndWith[i];
                if(finalprofit > ret) ret = finalprofit;
                if(prices[i] > highest) highest = prices[i];
            }
        }

        return ret;
    }
};

LeetCode题目:Best Time to Buy and Sell Stock II

这个更简单了,题目要求可以多次买卖,但是同一时间只能有一股在手里。
这样就可以在每次上升子序列之前买入,在上升子序列结束的时候卖出。相当于能够获得所有的上升子序列的收益。
而且,对于一个上升子序列,比如:5,1,2,3,4,0 中的1,2,3,4序列来说,对于两种操作方案:
一,在1买入,4卖出;
二,在1买入,2卖出同时买入,3卖出同时买入,4卖出;
这两种操作下,收益是一样的。


所以算法就是:从头到尾扫描prices,如果i比i-1大,那么price[i] – price[i-1]就可以计入最后的收益中。这样扫描一次O(n)就可以获得最大收益了。



Best Time to Buy and Sell Stock II
Say you have an array for which the ith element is the price of a given stock on day i.
Design an algorithm to find the maximum profit. You may complete as many transactions as you like (ie, buy one and sell one share of the stock multiple times). However, you may not engage in multiple transactions at the same time (ie, you must sell the stock before you buy again).



代码:44ms过大集合

class Solution {
public:
    int maxProfit(vector &prices) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        int p = 0;
        for(int i = 1; i < prices.size() ; ++i) {
            int delta = prices[i] - prices[i-1];
            if(delta > 0 ) {
                p += delta;
            }
        }
        return p;
    }
};

LeetCode题目:Best Time to Buy and Sell Stock

这个很简单,一次扫描完成。只需要找到最大增长即可。
从前往后,用当前价格减去此前最低价格,就是在当前点卖出股票能获得的最高利润。
扫描的过程中更新最大利润和最低价格就行了。
O(n)



Best Time to Buy and Sell Stock
Say you have an array for which the ith element is the price of a given stock on day i.
If you were only permitted to complete at most one transaction (ie, buy one and sell one share of the stock), design an algorithm to find the maximum profit.



代码:52ms过大集合

class Solution {
public:
    int maxProfit(vector &prices) {
        // Start typing your C/C++ solution below
        // DO NOT write int main() function
        if (prices.size() <= 1)
            return 0;
        int low = prices[0];
        int maxp = 0;
        for(int i = 1; i < prices.size(); ++i) {
            int profit = prices[i] - low;
            if(maxp < profit) maxp = profit;
            if(low > prices[i]) low = prices[i];
        }
        return maxp;
    }
};

Bit操作大全

转载自:http://graphics.stanford.edu/~seander/bithacks.html

 

Bit Twiddling Hacks

By Sean Eron Anderson
seander@cs.stanford.edu

Individually, the code snippets here are in the public domain (unless otherwise noted) — feel free to use them however you please. The aggregate collection and descriptions are © 1997-2005 Sean Eron Anderson. The code and descriptions are distributed in the hope that they will be useful, but WITHOUT ANY WARRANTY and without even the implied warranty of merchantability or fitness for a particular purpose. As of May 5, 2005, all the code has been tested thoroughly. Thousands of people have read it. Moreover, Professor Randal Bryant, the Dean of Computer Science at Carnegie Mellon University, has personally tested almost everything with his Uclid code verification system. What he hasn’t tested, I have checked against all possible inputs on a 32-bit machine. To the first person to inform me of a legitimate bug in the code, I’ll pay a bounty of US$10 (by check or Paypal). If directed to a charity, I’ll pay US$20.

 

 

Contents


About the operation counting methodology

When totaling the number of operations for algorithms here, any C operator is counted as one operation. Intermediate assignments, which need not be written to RAM, are not counted. Of course, this operation counting approach only serves as an approximation of the actual number of machine instructions and CPU time. All operations are assumed to take the same amount of time, which is not true in reality, but CPUs have been heading increasingly in this direction over time. There are many nuances that determine how fast a system will run a given sample of code, such as cache sizes, memory bandwidths, instruction sets, etc. In the end, benchmarking is the best way to determine whether one method is really faster than another, so consider the techniques below as possibilities to test on your target architecture.

 


Compute the sign of an integer

int v;      // we want to find the sign of v
int sign;   // the result goes here 

// CHAR_BIT is the number of bits per byte (normally 8).
sign = -(v < 0);  // if v < 0 then -1, else 0. 
// or, to avoid branching on CPUs with flag registers (IA32):
sign = -(int)((unsigned int)((int)v) >> (sizeof(int) * CHAR_BIT - 1));
// or, for one less instruction (but not portable):
sign = v >> (sizeof(int) * CHAR_BIT - 1);

The last expression above evaluates to sign = v >> 31 for 32-bit integers. This is one operation faster than the obvious way, sign = -(v < 0). This trick works because when signed integers are shifted right, the value of the far left bit is copied to the other bits. The far left bit is 1 when the value is negative and 0 otherwise; all 1 bits gives -1. Unfortunately, this behavior is architecture-specific.

Alternatively, if you prefer the result be either -1 or +1, then use:

sign = +1 | (v >> (sizeof(int) * CHAR_BIT - 1));  // if v < 0 then -1, else +1

On the other hand, if you prefer the result be either -1, 0, or +1, then use:

sign = (v != 0) | -(int)((unsigned int)((int)v) >> (sizeof(int) * CHAR_BIT - 1));
// Or, for more speed but less portability:
sign = (v != 0) | (v >> (sizeof(int) * CHAR_BIT - 1));  // -1, 0, or +1
// Or, for portability, brevity, and (perhaps) speed:
sign = (v > 0) - (v < 0); // -1, 0, or +1

If instead you want to know if something is non-negative, resulting in +1 or else 0, then use:

sign = 1 ^ ((unsigned int)v >> (sizeof(int) * CHAR_BIT - 1)); // if v < 0 then 0, else 1

Caveat: On March 7, 2003, Angus Duggan pointed out that the 1989 ANSI C specification leaves the result of signed right-shift implementation-defined, so on some systems this hack might not work. For greater portability, Toby Speight suggested on September 28, 2005 that CHAR_BIT be used here and throughout rather than assuming bytes were 8 bits long. Angus recommended the more portable versions above, involving casting on March 4, 2006. Rohit Garg suggested the version for non-negative integers on September 12, 2009.

 


Detect if two integers have opposite signs

int x, y;               // input values to compare signs

bool f = ((x ^ y) < 0); // true iff x and y have opposite signs

Manfred Weis suggested I add this entry on November 26, 2009.

 


Compute the integer absolute value (abs) without branching

int v;           // we want to find the absolute value of v
unsigned int r;  // the result goes here 
int const mask = v >> sizeof(int) * CHAR_BIT - 1;

r = (v + mask) ^ mask;

Patented variation:

r = (v ^ mask) - mask;

Some CPUs don’t have an integer absolute value instruction (or the compiler fails to use them). On machines where branching is expensive, the above expression can be faster than the obvious approach, r = (v < 0) ? -(unsigned)v : v, even though the number of operations is the same.

On March 7, 2003, Angus Duggan pointed out that the 1989 ANSI C specification leaves the result of signed right-shift implementation-defined, so on some systems this hack might not work. I’ve read that ANSI C does not require values to be represented as two’s complement, so it may not work for that reason as well (on a diminishingly small number of old machines that still use one’s complement). On March 14, 2004, Keith H. Duggar sent me the patented variation above; it is superior to the one I initially came up with, r=(+1|(v>>(sizeof(int)CHAR_BIT-1)))v, because a multiply is not used. Unfortunately, this method has beenpatented in the USA on June 6, 2000 by Vladimir Yu Volkonsky and assigned to Sun Microsystems. On August 13, 2006, Yuriy Kaminskiy told me that the patent is likely invalid because the method was published well before the patent was even filed, such as in How to Optimize for the Pentium Processor by Agner Fog, dated November, 9, 1996. Yuriy also mentioned that this document was translated to Russian in 1997, which Vladimir could have read. Moreover, the Internet Archive also has an old link to it. On January 30, 2007, Peter Kankowski shared with me an abs version he discovered that was inspired by Microsoft’s Visual C++ compiler output. It is featured here as the primary solution. On December 6, 2007, Hai Jin complained that the result was signed, so when computing the abs of the most negative value, it was still negative. On April 15, 2008 Andrew Shapira pointed out that the obvious approach could overflow, as it lacked an (unsigned) cast then; for maximum portability he suggested (v < 0) ? (1 + ((unsigned)(-1-v))) : (unsigned)v. But citing the ISO C99 spec on July 9, 2008, Vincent Lefèvre convinced me to remove it becasue even on non-2s-complement machines -(unsigned)v will do the right thing. The evaluation of -(unsigned)v first converts the negative value of v to an unsigned by adding 2N, yielding a 2s complement representation of v’s value that I’ll call U. Then, U is negated, giving the desired result, -U = 0 – U = 2N – U = 2N – (v+2N) = -v = abs(v).

 


Compute the minimum (min) or maximum (max) of two integers without branching

int x;  // we want to find the minimum of x and y
int y;   
int r;  // the result goes here 

r = y ^ ((x ^ y) & -(x < y)); // min(x, y)

On some rare machines where branching is very expensive and no condition move instructions exist, the above expression might be faster than the obvious approach, r = (x < y) ? x : y, even though it involves two more instructions. (Typically, the obvious approach is best, though.) It works because if x < y, then -(x < y) will be all ones, so r = y ^ (x ^ y) & ~0 = y ^ x ^ y = x. Otherwise, if x >= y, then -(x < y) will be all zeros, so r = y ^ ((x ^ y) & 0) = y. On some machines, evaluating (x < y) as 0 or 1 requires a branch instruction, so there may be no advantage.

To find the maximum, use:

r = x ^ ((x ^ y) & -(x < y)); // max(x, y)

Quick and dirty versions:

If you know that INT_MIN <= x – y <= INT_MAX, then you can use the following, which are faster because (x – y) only needs to be evaluated once.

r = y + ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1))); // min(x, y)
r = x - ((x - y) & ((x - y) >> (sizeof(int) * CHAR_BIT - 1))); // max(x, y)

Note that the 1989 ANSI C specification doesn’t specify the result of signed right-shift, so these aren’t portable. If exceptions are thrown on overflows, then the values of x and y should be unsigned or cast to unsigned for the subtractions to avoid unnecessarily throwing an exception, however the right-shift needs a signed operand to produce all one bits when negative, so cast to signed there.

On March 7, 2003, Angus Duggan pointed out the right-shift portability issue. On May 3, 2005, Randal E. Bryant alerted me to the need for the precondition, INT_MIN <= x – y <= INT_MAX, and suggested the non-quick and dirty version as a fix. Both of these issues concern only the quick and dirty version. Nigel Horspoon observed on July 6, 2005 that gcc produced the same code on a Pentium as the obvious solution because of how it evaluates (x < y). On July 9, 2008 Vincent Lefèvre pointed out the potential for overflow exceptions with subtractions in r = y + ((x – y) & -(x < y)), which was the previous version. Timothy B. Terriberry suggested using xor rather than add and subract to avoid casting and the risk of overflows on June 2, 2009.

 


Determining if an integer is a power of 2

unsigned int v; // we want to see if v is a power of 2
bool f;         // the result goes here 

f = (v & (v - 1)) == 0;

Note that 0 is incorrectly considered a power of 2 here. To remedy this, use:

f = v && !(v & (v - 1));

Sign extending from a constant bit-width

Sign extension is automatic for built-in types, such as chars and ints. But suppose you have a signed two’s complement number, x, that is stored using only b bits. Moreover, suppose you want to convert x to an int, which has more than b bits. A simple copy will work if x is positive, but if negative, the sign must be extended. For example, if we have only 4 bits to store a number, then -3 is represented as 1101 in binary. If we have 8 bits, then -3 is 11111101. The most-significant bit of the 4-bit representation is replicated sinistrally to fill in the destination when we convert to a representation with more bits; this is sign extending. In C, sign extension from a constant bit-width is trivial, since bit fields may be specified in structs or unions. For example, to convert from 5 bits to an full integer:

int x; // convert this from using 5 bits to a full int
int r; // resulting sign extended number goes here
struct {signed int x:5;} s;
r = s.x = x;

The following is a C++ template function that uses the same language feature to convert from B bits in one operation (though the compiler is generating more, of course).

template <typename T, unsigned B>
inline T signextend(const T x)
{
  struct {T x:B;} s;
  return s.x = x;
}

int r = signextend<signed int,5>(x);  // sign extend 5 bit number x to r

John Byrd caught a typo in the code (attributed to html formatting) on May 2, 2005. On March 4, 2006, Pat Wood pointed out that the ANSI C standard requires that the bitfield have the keyword “signed” to be signed; otherwise, the sign is undefined.


Sign extending from a variable bit-width

Sometimes we need to extend the sign of a number but we don’t know a priori the number of bits, b, in which it is represented. (Or we could be programming in a language like Java, which lacks bitfields.)

unsigned b; // number of bits representing the number in x
int x;      // sign extend this b-bit number to r
int r;      // resulting sign-extended number
int const m = 1U << (b - 1); // mask can be pre-computed if b is fixed

x = x & ((1U << b) - 1);  // (Skip this if bits in x above position b are already zero.)
r = (x ^ m) - m;

The code above requires four operations, but when the bitwidth is a constant rather than variable, it requires only two fast operations, assuming the upper bits are already zeroes.

A slightly faster but less portable method that doesn’t depend on the bits in x above position b being zero is:

int const m = CHAR_BIT * sizeof(x) - b;
r = (x << m) >> m;

Sean A. Irvine suggested that I add sign extension methods to this page on June 13, 2004, and he provided m = (1 << (b - 1)) - 1; r = -(x & ~m) | x; as a starting point from which I optimized to get m = 1U << (b – 1); r = -(x & m) | x. But then on May 11, 2007, Shay Green suggested the version above, which requires one less operation than mine. Vipin Sharma suggested I add a step to deal with situations where x had possible ones in bits other than the b bits we wanted to sign-extend on Oct. 15, 2008. On December 31, 2009 Chris Pirazzi suggested I add the faster version, which requires two operations for constant bit-widths and three for variable widths.

 


Sign extending from a variable bit-width in 3 operations

The following may be slow on some machines, due to the effort required for multiplication and division. This version is 4 operations. If you know that your initial bit-width, b, is greater than 1, you might do this type of sign extension in 3 operations by using r = (x * multipliers[b]) / multipliers[b], which requires only one array lookup.

unsigned b; // number of bits representing the number in x
int x;      // sign extend this b-bit number to r
int r;      // resulting sign-extended number
#define M(B) (1U << ((sizeof(x) * CHAR_BIT) - B)) // CHAR_BIT=bits/byte
static int const multipliers[] = 
{
  0,     M(1),  M(2),  M(3),  M(4),  M(5),  M(6),  M(7),
  M(8),  M(9),  M(10), M(11), M(12), M(13), M(14), M(15),
  M(16), M(17), M(18), M(19), M(20), M(21), M(22), M(23),
  M(24), M(25), M(26), M(27), M(28), M(29), M(30), M(31),
  M(32)
}; // (add more if using more than 64 bits)
static int const divisors[] = 
{
  1,    ~M(1),  M(2),  M(3),  M(4),  M(5),  M(6),  M(7),
  M(8),  M(9),  M(10), M(11), M(12), M(13), M(14), M(15),
  M(16), M(17), M(18), M(19), M(20), M(21), M(22), M(23),
  M(24), M(25), M(26), M(27), M(28), M(29), M(30), M(31),
  M(32)
}; // (add more for 64 bits)
#undef M
r = (x * multipliers[b]) / divisors[b];

The following variation is not portable, but on architectures that employ an arithmetic right-shift, maintaining the sign, it should be fast.

const int s = -b; // OR:  sizeof(x) * CHAR_BIT - b;
r = (x << s) >> s;

Randal E. Bryant pointed out a bug on May 3, 2005 in an earlier version (that used multipliers[] for divisors[]), where it failed on the case of x=1 and b=1.

 


Conditionally set or clear bits without branching

bool f;         // conditional flag
unsigned int m; // the bit mask
unsigned int w; // the word to modify:  if (f) w |= m; else w &= ~m; 

w ^= (-f ^ w) & m;

// OR, for superscalar CPUs:
w = (w & ~m) | (-f & m);

On some architectures, the lack of branching can more than make up for what appears to be twice as many operations. For instance, informal speed tests on an AMD Athlon™ XP 2100+ indicated it was 5-10% faster. An Intel Core 2 Duo ran the superscalar version about 16% faster than the first. Glenn Slayden informed me of the first expression on December 11, 2003. Marco Yu shared the superscalar version with me on April 3, 2007 and alerted me to a typo 2 days later.

 


Conditionally negate a value without branching

If you need to negate only when a flag is false, then use the following to avoid branching:

bool fDontNegate;  // Flag indicating we should not negate v.
int v;             // Input value to negate if fDontNegate is false.
int r;             // result = fDontNegate ? v : -v;

r = (fDontNegate ^ (fDontNegate - 1)) * v;

If you need to negate only when a flag is true, then use this:

bool fNegate;  // Flag indicating if we should negate v.
int v;         // Input value to negate if fNegate is true.
int r;         // result = fNegate ? -v : v;

r = (v ^ -fNegate) + fNegate;

Avraham Plotnitzky suggested I add the first version on June 2, 2009. Motivated to avoid the multiply, I came up with the second version on June 8, 2009. Alfonso De Gregorio pointed out that some parens were missing on November 26, 2009, and received a bug bounty.

 


Merge bits from two values according to a mask

unsigned int a;    // value to merge in non-masked bits
unsigned int b;    // value to merge in masked bits
unsigned int mask; // 1 where bits from b should be selected; 0 where from a.
unsigned int r;    // result of (a & ~mask) | (b & mask) goes here

r = a ^ ((a ^ b) & mask);

This shaves one operation from the obvious way of combining two sets of bits according to a bit mask. If the mask is a constant, then there may be no advantage.

Ron Jeffery sent this to me on February 9, 2006.

 


Counting bits set (naive way)

unsigned int v; // count the number of bits set in v
unsigned int c; // c accumulates the total bits set in v

for (c = 0; v; v >>= 1)
{
  c += v & 1;
}

The naive approach requires one iteration per bit, until no more bits are set. So on a 32-bit word with only the high set, it will go through 32 iterations.

 


Counting bits set by lookup table

static const unsigned char BitsSetTable256[256] = 
{
#   define B2(n) n,     n+1,     n+1,     n+2
#   define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2)
#   define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2)
    B6(0), B6(1), B6(1), B6(2)
};

unsigned int v; // count the number of bits set in 32-bit value v
unsigned int c; // c is the total bits set in v

// Option 1:
c = BitsSetTable256[v & 0xff] + 
    BitsSetTable256[(v >> 8) & 0xff] + 
    BitsSetTable256[(v >> 16) & 0xff] + 
    BitsSetTable256[v >> 24]; 

// Option 2:
unsigned char * p = (unsigned char *) &v;
c = BitsSetTable256[p[0]] + 
    BitsSetTable256[p[1]] + 
    BitsSetTable256[p[2]] + 
    BitsSetTable256[p[3]];

// To initially generate the table algorithmically:
BitsSetTable256[0] = 0;
for (int i = 0; i < 256; i++)
{
  BitsSetTable256[i] = (i & 1) + BitsSetTable256[i / 2];
}

On July 14, 2009 Hallvard Furuseth suggested the macro compacted table.

 


Counting bits set, Brian Kernighan’s way

unsigned int v; // count the number of bits set in v
unsigned int c; // c accumulates the total bits set in v
for (c = 0; v; c++)
{
  v &= v - 1; // clear the least significant bit set
}

Brian Kernighan’s method goes through as many iterations as there are set bits. So if we have a 32-bit word with only the high bit set, then it will only go once through the loop.

Published in 1988, the C Programming Language 2nd Ed. (by Brian W. Kernighan and Dennis M. Ritchie) mentions this in exercise 2-9. On April 19, 2006 Don Knuth pointed out to me that this method “was first published by Peter Wegner in CACM 3 (1960), 322. (Also discovered independently by Derrick Lehmer and published in 1964 in a book edited by Beckenbach.)”

 


Counting bits set in 14, 24, or 32-bit words using 64-bit instructions

unsigned int v; // count the number of bits set in v
unsigned int c; // c accumulates the total bits set in v

// option 1, for at most 14-bit values in v:
c = (v * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;

// option 2, for at most 24-bit values in v:
c =  ((v & 0xfff) * 0x1001001001001ULL & 0x84210842108421ULL) % 0x1f;
c += (((v & 0xfff000) >> 12) * 0x1001001001001ULL & 0x84210842108421ULL) 
     % 0x1f;

// option 3, for at most 32-bit values in v:
c =  ((v & 0xfff) * 0x1001001001001ULL & 0x84210842108421ULL) % 0x1f;
c += (((v & 0xfff000) >> 12) * 0x1001001001001ULL & 0x84210842108421ULL) % 
     0x1f;
c += ((v >> 24) * 0x1001001001001ULL & 0x84210842108421ULL) % 0x1f;

This method requires a 64-bit CPU with fast modulus division to be efficient. The first option takes only 3 operations; the second option takes 10; and the third option takes 15.

Rich Schroeppel originally created a 9-bit version, similiar to option 1; see the Programming Hacks section of Beeler, M., Gosper, R. W., and Schroeppel, R. HAKMEM. MIT AI Memo 239, Feb. 29, 1972. His method was the inspiration for the variants above, devised by Sean Anderson. Randal E. Bryant offered a couple bug fixes on May 3, 2005. Bruce Dawson tweaked what had been a 12-bit version and made it suitable for 14 bits using the same number of operations on Feburary 1, 2007.

 

 


Counting bits set, in parallel

unsigned int v; // count bits set in this (32-bit value)
unsigned int c; // store the total here
static const int S[] = {1, 2, 4, 8, 16}; // Magic Binary Numbers
static const int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF};

c = v - ((v >> 1) & B[0]);
c = ((c >> S[1]) & B[1]) + (c & B[1]);
c = ((c >> S[2]) + c) & B[2];
c = ((c >> S[3]) + c) & B[3];
c = ((c >> S[4]) + c) & B[4];

The B array, expressed as binary, is:

B[0] = 0x55555555 = 01010101 01010101 01010101 01010101
B[1] = 0x33333333 = 00110011 00110011 00110011 00110011
B[2] = 0x0F0F0F0F = 00001111 00001111 00001111 00001111
B[3] = 0x00FF00FF = 00000000 11111111 00000000 11111111
B[4] = 0x0000FFFF = 00000000 00000000 11111111 11111111

We can adjust the method for larger integer sizes by continuing with the patterns for the Binary Magic Numbers, B and S. If there are k bits, then we need the arrays S and B to be ceil(lg(k)) elements long, and we must compute the same number of expressions for c as S or B are long. For a 32-bit v, 16 operations are used.

The best method for counting bits in a 32-bit integer v is the following:

v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
c = ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count

The best bit counting method takes only 12 operations, which is the same as the lookup-table method, but avoids the memory and potential cache misses of a table. It is a hybrid between the purely parallel method above and the earlier methods using multiplies (in the section on counting bits with 64-bit instructions), though it doesn’t use 64-bit instructions. The counts of bits set in the bytes is done in parallel, and the sum total of the bits set in the bytes is computed by multiplying by 0x1010101 and shifting right 24 bits.

A generalization of the best bit counting method to integers of bit-widths upto 128 (parameterized by type T) is this:

v = v - ((v >> 1) & (T)~(T)0/3);                           // temp
v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);      // temp
v = (v + (v >> 4)) & (T)~(T)0/255*15;                      // temp
c = (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT; // count

See Ian Ashdown’s nice newsgroup post for more information on counting the number of bits set (also known as sideways addition). The best bit counting method was brought to my attention on October 5, 2005 byAndrew Shapira; he found it in pages 187-188 of Software Optimization Guide for AMD Athlon™ 64 and Opteron™ Processors. Charlie Gordon suggested a way to shave off one operation from the purely parallel version on December 14, 2005, and Don Clugston trimmed three more from it on December 30, 2005. I made a typo with Don’s suggestion that Eric Cole spotted on January 8, 2006. Eric later suggested the arbitrary bit-width generalization to the best method on November 17, 2006. On April 5, 2007, Al Williams observed that I had a line of dead code at the top of the first method.

 


Count bits set (rank) from the most-significant bit upto a given position

The following finds the the rank of a bit, meaning it returns the sum of bits that are set to 1 from the most-signficant bit downto the bit at the given position.

  uint64_t v;       // Compute the rank (bits set) in v from the MSB to pos.
  unsigned int pos; // Bit position to count bits upto.
  uint64_t r;       // Resulting rank of bit at pos goes here.

  // Shift out bits after given position.
  r = v >> (sizeof(v) * CHAR_BIT - pos);
  // Count set bits in parallel.
  // r = (r & 0x5555...) + ((r >> 1) & 0x5555...);
  r = r - ((r >> 1) & ~0UL/3);
  // r = (r & 0x3333...) + ((r >> 2) & 0x3333...);
  r = (r & ~0UL/5) + ((r >> 2) & ~0UL/5);
  // r = (r & 0x0f0f...) + ((r >> 4) & 0x0f0f...);
  r = (r + (r >> 4)) & ~0UL/17;
  // r = r % 255;
  r = (r * (~0UL/255)) >> ((sizeof(v) - 1) * CHAR_BIT);

Juha Järvi sent this to me on November 21, 2009 as an inverse operation to the computing the bit position with the given rank, which follows.

 


Select the bit position (from the most-significant bit) with the given count (rank)

The following 64-bit code selects the position of the rth 1 bit when counting from the left. In other words if we start at the most significant bit and proceed to the right, counting the number of bits set to 1 until we reach the desired rank, r, then the position where we stop is returned. If the rank requested exceeds the count of bits set, then 64 is returned. The code may be modified for 32-bit or counting from the right.

  uint64_t v;          // Input value to find position with rank r.
  unsigned int r;      // Input: bit's desired rank [1-64].
  unsigned int s;      // Output: Resulting position of bit with rank r [1-64]
  uint64_t a, b, c, d; // Intermediate temporaries for bit count.
  unsigned int t;      // Bit count temporary.

  // Do a normal parallel bit count for a 64-bit integer,                     
  // but store all intermediate steps.                                        
  // a = (v & 0x5555...) + ((v >> 1) & 0x5555...);
  a =  v - ((v >> 1) & ~0UL/3);
  // b = (a & 0x3333...) + ((a >> 2) & 0x3333...);
  b = (a & ~0UL/5) + ((a >> 2) & ~0UL/5);
  // c = (b & 0x0f0f...) + ((b >> 4) & 0x0f0f...);
  c = (b + (b >> 4)) & ~0UL/0x11;
  // d = (c & 0x00ff...) + ((c >> 8) & 0x00ff...);
  d = (c + (c >> 8)) & ~0UL/0x101;
  t = (d >> 32) + (d >> 48);
  // Now do branchless select!                                                
  s  = 64;
  // if (r > t) {s -= 32; r -= t;}
  s -= ((t - r) & 256) >> 3; r -= (t & ((t - r) >> 8));
  t  = (d >> (s - 16)) & 0xff;
  // if (r > t) {s -= 16; r -= t;}
  s -= ((t - r) & 256) >> 4; r -= (t & ((t - r) >> 8));
  t  = (c >> (s - 8)) & 0xf;
  // if (r > t) {s -= 8; r -= t;}
  s -= ((t - r) & 256) >> 5; r -= (t & ((t - r) >> 8));
  t  = (b >> (s - 4)) & 0x7;
  // if (r > t) {s -= 4; r -= t;}
  s -= ((t - r) & 256) >> 6; r -= (t & ((t - r) >> 8));
  t  = (a >> (s - 2)) & 0x3;
  // if (r > t) {s -= 2; r -= t;}
  s -= ((t - r) & 256) >> 7; r -= (t & ((t - r) >> 8));
  t  = (v >> (s - 1)) & 0x1;
  // if (r > t) s--;
  s -= ((t - r) & 256) >> 8;
  s = 65 - s;

If branching is fast on your target CPU, consider uncommenting the if-statements and commenting the lines that follow them.

Juha Järvi sent this to me on November 21, 2009.

 


Computing parity the naive way

unsigned int v;       // word value to compute the parity of
bool parity = false;  // parity will be the parity of v

while (v)
{
  parity = !parity;
  v = v & (v - 1);
}

The above code uses an approach like Brian Kernigan’s bit counting, above. The time it takes is proportional to the number of bits set.

 


Compute parity by lookup table

static const bool ParityTable256[256] = 
{
#   define P2(n) n, n^1, n^1, n
#   define P4(n) P2(n), P2(n^1), P2(n^1), P2(n)
#   define P6(n) P4(n), P4(n^1), P4(n^1), P4(n)
    P6(0), P6(1), P6(1), P6(0)
};

unsigned char b;  // byte value to compute the parity of
bool parity = ParityTable256[b];

// OR, for 32-bit words:
unsigned int v;
v ^= v >> 16;
v ^= v >> 8;
bool parity = ParityTable256[v & 0xff];

// Variation:
unsigned char * p = (unsigned char *) &v;
parity = ParityTable256[p[0] ^ p[1] ^ p[2] ^ p[3]];

Randal E. Bryant encouraged the addition of the (admittedly) obvious last variation with variable p on May 3, 2005. Bruce Rawles found a typo in an instance of the table variable’s name on September 27, 2005, and he received a $10 bug bounty. On October 9, 2006, Fabrice Bellard suggested the 32-bit variations above, which require only one table lookup; the previous version had four lookups (one per byte) and were slower. On July 14, 2009 Hallvard Furuseth suggested the macro compacted table.

 


Compute parity of a byte using 64-bit multiply and modulus division

unsigned char b;  // byte value to compute the parity of
bool parity = 
  (((b * 0x0101010101010101ULL) & 0x8040201008040201ULL) % 0x1FF) & 1;

The method above takes around 4 operations, but only works on bytes.

 


Compute parity of word with a multiply

The following method computes the parity of the 32-bit value in only 8 operations using a multiply.

    unsigned int v; // 32-bit word
    v ^= v >> 1;
    v ^= v >> 2;
    v = (v & 0x11111111U) * 0x11111111U;
    return (v >> 28) & 1;

Also for 64-bits, 8 operations are still enough.

    unsigned long long v; // 64-bit word
    v ^= v >> 1;
    v ^= v >> 2;
    v = (v & 0x1111111111111111UL) * 0x1111111111111111UL;
    return (v >> 60) & 1;

Andrew Shapira came up with this and sent it to me on Sept. 2, 2007.


Compute parity in parallel

unsigned int v;  // word value to compute the parity of
v ^= v >> 16;
v ^= v >> 8;
v ^= v >> 4;
v &= 0xf;
return (0x6996 >> v) & 1;

The method above takes around 9 operations, and works for 32-bit words. It may be optimized to work just on bytes in 5 operations by removing the two lines immediately following “unsigned int v;”. The method first shifts and XORs the eight nibbles of the 32-bit value together, leaving the result in the lowest nibble of v. Next, the binary number 0110 1001 1001 0110 (0x6996 in hex) is shifted to the right by the value represented in the lowest nibble of v. This number is like a miniature 16-bit parity-table indexed by the low four bits in v. The result has the parity of v in bit 1, which is masked and returned.

Thanks to Mathew Hendry for pointing out the shift-lookup idea at the end on Dec. 15, 2002. That optimization shaves two operations off using only shifting and XORing to find the parity.

 


Swapping values with subtraction and addition

#define SWAP(a, b) ((&(a) == &(b)) || \
                    (((a) -= (b)), ((b) += (a)), ((a) = (b) - (a))))

This swaps the values of a and b without using a temporary variable. The initial check for a and b being the same location in memory may be omitted when you know this can’t happen. (The compiler may omit it anyway as an optimization.) If you enable overflows exceptions, then pass unsigned values so an exception isn’t thrown. The XOR method that follows may be slightly faster on some machines. Don’t use this with floating-point numbers (unless you operate on their raw integer representations).

Sanjeev Sivasankaran suggested I add this on June 12, 2007. Vincent Lefèvre pointed out the potential for overflow exceptions on July 9, 2008


Swapping values with XOR

#define SWAP(a, b) (((a) ^= (b)), ((b) ^= (a)), ((a) ^= (b)))

This is an old trick to exchange the values of the variables a and b without using extra space for a temporary variable.

On January 20, 2005, Iain A. Fleming pointed out that the macro above doesn’t work when you swap with the same memory location, such as SWAP(a[i], a[j]) with i == j. So if that may occur, consider defining the macro as (((a) == (b)) || (((a) ^= (b)), ((b) ^= (a)), ((a) ^= (b)))). On July 14, 2009, Hallvard Furuseth suggested that on some machines, (((a) ^ (b)) && ((b) ^= (a) ^= (b), (a) ^= (b))) might be faster, since the (a) ^ (b) expression is reused.

 


Swapping individual bits with XOR

unsigned int i, j; // positions of bit sequences to swap
unsigned int n;    // number of consecutive bits in each sequence
unsigned int b;    // bits to swap reside in b
unsigned int r;    // bit-swapped result goes here

unsigned int x = ((b >> i) ^ (b >> j)) & ((1U << n) - 1); // XOR temporary
r = b ^ ((x << i) | (x << j));

As an example of swapping ranges of bits suppose we have have b = 00101111 (expressed in binary) and we want to swap the n = 3 consecutive bits starting at i = 1 (the second bit from the right) with the 3 consecutive bits starting at j = 5; the result would be r = 11100011 (binary).

This method of swapping is similar to the general purpose XOR swap trick, but intended for operating on individual bits.  The variable x stores the result of XORing the pairs of bit values we want to swap, and then the bits are set to the result of themselves XORed with x.  Of course, the result is undefined if the sequences overlap.

On July 14, 2009 Hallvard Furuseth suggested that I change the 1 << n to 1U << n because the value was being assigned to an unsigned and to avoid shifting into a sign bit.

 


Reverse bits the obvious way

unsigned int v;     // input bits to be reversed
unsigned int r = v; // r will be reversed bits of v; first get LSB of v
int s = sizeof(v) * CHAR_BIT - 1; // extra shift needed at end

for (v >>= 1; v; v >>= 1)
{   
  r <<= 1;
  r |= v & 1;
  s--;
}
r <<= s; // shift when v's highest bits are zero

On October 15, 2004, Michael Hoisie pointed out a bug in the original version. Randal E. Bryant suggested removing an extra operation on May 3, 2005. Behdad Esfabod suggested a slight change that eliminated one iteration of the loop on May 18, 2005. Then, on February 6, 2007, Liyong Zhou suggested a better version that loops while v is not 0, so rather than iterating over all bits it stops early.

 


Reverse bits in word by lookup table

static const unsigned char BitReverseTable256[256] = 
{
#   define R2(n)     n,     n + 2*64,     n + 1*64,     n + 3*64
#   define R4(n) R2(n), R2(n + 2*16), R2(n + 1*16), R2(n + 3*16)
#   define R6(n) R4(n), R4(n + 2*4 ), R4(n + 1*4 ), R4(n + 3*4 )
    R6(0), R6(2), R6(1), R6(3)
};

unsigned int v; // reverse 32-bit value, 8 bits at time
unsigned int c; // c will get v reversed

// Option 1:
c = (BitReverseTable256[v & 0xff] << 24) | 
    (BitReverseTable256[(v >> 8) & 0xff] << 16) | 
    (BitReverseTable256[(v >> 16) & 0xff] << 8) |
    (BitReverseTable256[(v >> 24) & 0xff]);

// Option 2:
unsigned char * p = (unsigned char *) &v;
unsigned char * q = (unsigned char *) &c;
q[3] = BitReverseTable256[p[0]]; 
q[2] = BitReverseTable256[p[1]]; 
q[1] = BitReverseTable256[p[2]]; 
q[0] = BitReverseTable256[p[3]];

The first method takes about 17 operations, and the second takes about 12, assuming your CPU can load and store bytes easily.

On July 14, 2009 Hallvard Furuseth suggested the macro compacted table.

 


Reverse the bits in a byte with 3 operations (64-bit multiply and modulus division):

unsigned char b; // reverse this (8-bit) byte

b = (b * 0x0202020202ULL & 0x010884422010ULL) % 1023;

The multiply operation creates five separate copies of the 8-bit byte pattern to fan-out into a 64-bit value. The AND operation selects the bits that are in the correct (reversed) positions, relative to each 10-bit groups of bits. The multiply and the AND operations copy the bits from the original byte so they each appear in only one of the 10-bit sets. The reversed positions of the bits from the original byte coincide with their relative positions within any 10-bit set. The last step, which involves modulus division by 2^10 – 1, has the effect of merging together each set of 10 bits (from positions 0-9, 10-19, 20-29, …) in the 64-bit value. They do not overlap, so the addition steps underlying the modulus division behave like or operations.

This method was attributed to Rich Schroeppel in the Programming Hacks section of Beeler, M., Gosper, R. W., and Schroeppel, R. HAKMEM. MIT AI Memo 239, Feb. 29, 1972.

 


Reverse the bits in a byte with 4 operations (64-bit multiply, no division):

unsigned char b; // reverse this byte

b = ((b * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;

The following shows the flow of the bit values with the boolean variables a, b, c, d, e, f, g, and h, which comprise an 8-bit byte. Notice how the first multiply fans out the bit pattern to multiple copies, while the last multiply combines them in the fifth byte from the right.

                                                                                        abcd efgh (-> hgfe dcba)
*                                                      1000 0000  0010 0000  0000 1000  0000 0010 (0x80200802)
-------------------------------------------------------------------------------------------------
                                            0abc defg  h00a bcde  fgh0 0abc  defg h00a  bcde fgh0
&                                           0000 1000  1000 0100  0100 0010  0010 0001  0001 0000 (0x0884422110)
-------------------------------------------------------------------------------------------------
                                            0000 d000  h000 0c00  0g00 00b0  00f0 000a  000e 0000
*                                           0000 0001  0000 0001  0000 0001  0000 0001  0000 0001 (0x0101010101)
-------------------------------------------------------------------------------------------------
                                            0000 d000  h000 0c00  0g00 00b0  00f0 000a  000e 0000
                                 0000 d000  h000 0c00  0g00 00b0  00f0 000a  000e 0000
                      0000 d000  h000 0c00  0g00 00b0  00f0 000a  000e 0000
           0000 d000  h000 0c00  0g00 00b0  00f0 000a  000e 0000
0000 d000  h000 0c00  0g00 00b0  00f0 000a  000e 0000
-------------------------------------------------------------------------------------------------
0000 d000  h000 dc00  hg00 dcb0  hgf0 dcba  hgfe dcba  hgfe 0cba  0gfe 00ba  00fe 000a  000e 0000
>> 32
-------------------------------------------------------------------------------------------------
                                            0000 d000  h000 dc00  hg00 dcb0  hgf0 dcba  hgfe dcba  
&                                                                                       1111 1111
-------------------------------------------------------------------------------------------------
                                                                                        hgfe dcba

Note that the last two steps can be combined on some processors because the registers can be accessed as bytes; just multiply so that a register stores the upper 32 bits of the result and the take the low byte. Thus, it may take only 6 operations.

Devised by Sean Anderson, July 13, 2001.

 


Reverse the bits in a byte with 7 operations (no 64-bit):

b = ((b * 0x0802LU & 0x22110LU) | (b * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16;

Make sure you assign or cast the result to an unsigned char to remove garbage in the higher bits. Devised by Sean Anderson, July 13, 2001. Typo spotted and correction supplied by Mike Keith, January 3, 2002.

 


Reverse an N-bit quantity in parallel in 5 * lg(N) operations:

unsigned int v; // 32-bit word to reverse bit order

// swap odd and even bits
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
// swap consecutive pairs
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
// swap nibbles ... 
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
// swap bytes
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
// swap 2-byte long pairs
v = ( v >> 16             ) | ( v               << 16);

The following variation is also O(lg(N)), however it requires more operations to reverse v. Its virtue is in taking less slightly memory by computing the constants on the fly.

unsigned int s = sizeof(v) * CHAR_BIT; // bit size; must be power of 2 
unsigned int mask = ~0;         
while ((s >>= 1) > 0) 
{
  mask ^= (mask << s);
  v = ((v >> s) & mask) | ((v << s) & ~mask);
}

These methods above are best suited to situations where N is large. If you use the above with 64-bit ints (or larger), then you need to add more lines (following the pattern); otherwise only the lower 32 bits will be reversed and the result will be in the lower 32 bits.

See Dr. Dobb’s Journal 1983, Edwin Freed’s article on Binary Magic Numbers for more information. The second variation was suggested by Ken Raeburn on September 13, 2005. Veldmeijer mentioned that the first version could do without ANDS in the last line on March 19, 2006.

 


Compute modulus division by 1 << s without a division operator

const unsigned int n;          // numerator
const unsigned int s;
const unsigned int d = 1U << s; // So d will be one of: 1, 2, 4, 8, 16, 32, ...
unsigned int m;                // m will be n % d
m = n & (d - 1);

Most programmers learn this trick early, but it was included for the sake of completeness.

 


 

Compute modulus division by (1 << s) – 1 without a division operator

unsigned int n;                      // numerator
const unsigned int s;                // s > 0
const unsigned int d = (1 << s) - 1; // so d is either 1, 3, 7, 15, 31, ...).
unsigned int m;                      // n % d goes here.

for (m = n; n > d; n = m)
{
  for (m = 0; n; n >>= s)
  {
    m += n & d;
  }
}
// Now m is a value from 0 to d, but since with modulus division
// we want m to be 0 when it is d.
m = m == d ? 0 : m;

This method of modulus division by an integer that is one less than a power of 2 takes at most 5 + (4 + 5 * ceil(N / s)) * ceil(lg(N / s)) operations, where N is the number of bits in the numerator. In other words, it takes at most O(N * lg(N)) time.

Devised by Sean Anderson, August 15, 2001. Before Sean A. Irvine corrected me on June 17, 2004, I mistakenly commented that we could alternatively assign m = ((m + 1) & d) - 1; at the end. Michael Miller spotted a typo in the code April 25, 2005.

 


Compute modulus division by (1 << s) – 1 in parallel without a division operator

// The following is for a word size of 32 bits!

static const unsigned int M[] = 
{
  0x00000000, 0x55555555, 0x33333333, 0xc71c71c7,  
  0x0f0f0f0f, 0xc1f07c1f, 0x3f03f03f, 0xf01fc07f, 
  0x00ff00ff, 0x07fc01ff, 0x3ff003ff, 0xffc007ff,
  0xff000fff, 0xfc001fff, 0xf0003fff, 0xc0007fff,
  0x0000ffff, 0x0001ffff, 0x0003ffff, 0x0007ffff, 
  0x000fffff, 0x001fffff, 0x003fffff, 0x007fffff,
  0x00ffffff, 0x01ffffff, 0x03ffffff, 0x07ffffff,
  0x0fffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff
};

static const unsigned int Q[][6] = 
{
  { 0,  0,  0,  0,  0,  0}, {16,  8,  4,  2,  1,  1}, {16,  8,  4,  2,  2,  2},
  {15,  6,  3,  3,  3,  3}, {16,  8,  4,  4,  4,  4}, {15,  5,  5,  5,  5,  5},
  {12,  6,  6,  6 , 6,  6}, {14,  7,  7,  7,  7,  7}, {16,  8,  8,  8,  8,  8},
  { 9,  9,  9,  9,  9,  9}, {10, 10, 10, 10, 10, 10}, {11, 11, 11, 11, 11, 11},
  {12, 12, 12, 12, 12, 12}, {13, 13, 13, 13, 13, 13}, {14, 14, 14, 14, 14, 14},
  {15, 15, 15, 15, 15, 15}, {16, 16, 16, 16, 16, 16}, {17, 17, 17, 17, 17, 17},
  {18, 18, 18, 18, 18, 18}, {19, 19, 19, 19, 19, 19}, {20, 20, 20, 20, 20, 20},
  {21, 21, 21, 21, 21, 21}, {22, 22, 22, 22, 22, 22}, {23, 23, 23, 23, 23, 23},
  {24, 24, 24, 24, 24, 24}, {25, 25, 25, 25, 25, 25}, {26, 26, 26, 26, 26, 26},
  {27, 27, 27, 27, 27, 27}, {28, 28, 28, 28, 28, 28}, {29, 29, 29, 29, 29, 29},
  {30, 30, 30, 30, 30, 30}, {31, 31, 31, 31, 31, 31}
};

static const unsigned int R[][6] = 
{
  {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},
  {0x0000ffff, 0x000000ff, 0x0000000f, 0x00000003, 0x00000001, 0x00000001},
  {0x0000ffff, 0x000000ff, 0x0000000f, 0x00000003, 0x00000003, 0x00000003},
  {0x00007fff, 0x0000003f, 0x00000007, 0x00000007, 0x00000007, 0x00000007},
  {0x0000ffff, 0x000000ff, 0x0000000f, 0x0000000f, 0x0000000f, 0x0000000f},
  {0x00007fff, 0x0000001f, 0x0000001f, 0x0000001f, 0x0000001f, 0x0000001f},
  {0x00000fff, 0x0000003f, 0x0000003f, 0x0000003f, 0x0000003f, 0x0000003f},
  {0x00003fff, 0x0000007f, 0x0000007f, 0x0000007f, 0x0000007f, 0x0000007f},
  {0x0000ffff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff},
  {0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff, 0x000001ff}, 
  {0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff, 0x000003ff}, 
  {0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff, 0x000007ff}, 
  {0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff, 0x00000fff}, 
  {0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff, 0x00001fff}, 
  {0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff, 0x00003fff}, 
  {0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff, 0x00007fff}, 
  {0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff, 0x0000ffff}, 
  {0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff, 0x0001ffff}, 
  {0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff, 0x0003ffff}, 
  {0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff, 0x0007ffff},
  {0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff, 0x000fffff}, 
  {0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff, 0x001fffff}, 
  {0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff, 0x003fffff}, 
  {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}, 
  {0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff, 0x00ffffff},
  {0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff, 0x01ffffff}, 
  {0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff, 0x03ffffff}, 
  {0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff, 0x07ffffff},
  {0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff, 0x0fffffff},
  {0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff, 0x1fffffff}, 
  {0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff, 0x3fffffff}, 
  {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}
};

unsigned int n;       // numerator
const unsigned int s; // s > 0
const unsigned int d = (1 << s) - 1; // so d is either 1, 3, 7, 15, 31, ...).
unsigned int m;       // n % d goes here.

m = (n & M[s]) + ((n >> s) & M[s]);

for (const unsigned int * q = &Q[s][0], * r = &R[s][0]; m > d; q++, r++)
{
  m = (m >> *q) + (m & *r);
}
m = m == d ? 0 : m; // OR, less portably: m = m & -((signed)(m - d) >> s);

This method of finding modulus division by an integer that is one less than a power of 2 takes at most O(lg(N)) time, where N is the number of bits in the numerator (32 bits, for the code above). The number of operations is at most 12 + 9 * ceil(lg(N)). The tables may be removed if you know the denominator at compile time; just extract the few relevent entries and unroll the loop. It may be easily extended to more bits.

It finds the result by summing the values in base (1 << s) in parallel. First every other base (1 << s) value is added to the previous one. Imagine that the result is written on a piece of paper. Cut the paper in half, so that half the values are on each cut piece. Align the values and sum them onto a new piece of paper. Repeat by cutting this paper in half (which will be a quarter of the size of the previous one) and summing, until you cannot cut further. After performing lg(N/s/2) cuts, we cut no more; just continue to add the values and put the result onto a new piece of paper as before, while there are at least two s-bit values.

Devised by Sean Anderson, August 20, 2001. A typo was spotted by Randy E. Bryant on May 3, 2005 (after pasting the code, I had later added “unsinged” to a variable declaration). As in the previous hack, I mistakenly commented that we could alternatively assign m = ((m + 1) & d) - 1; at the end, and Don Knuth corrected me on April 19, 2006 and suggested m = m & -((signed)(m - d) >> s). On June 18, 2009 Sean Irvine proposed a change that used ((n >> s) & M[s]) instead of ((n & ~M[s]) >> s), which typically requires fewer operations because the M[s] constant is already loaded.

 


Find the log base 2 of an integer with the MSB N set in O(N) operations (the obvious way)

unsigned int v; // 32-bit word to find the log base 2 of
unsigned int r = 0; // r will be lg(v)

while (v >>= 1) // unroll for more speed...
{
  r++;
}

The log base 2 of an integer is the same as the position of the highest bit set (or most significant bit set, MSB). The following log base 2 methods are faster than this one.

 


Find the integer log base 2 of an integer with an 64-bit IEEE float

int v; // 32-bit integer to find the log base 2 of
int r; // result of log_2(v) goes here
union { unsigned int u[2]; double d; } t; // temp

t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] = 0x43300000;
t.u[__FLOAT_WORD_ORDER!=LITTLE_ENDIAN] = v;
t.d -= 4503599627370496.0;
r = (t.u[__FLOAT_WORD_ORDER==LITTLE_ENDIAN] >> 20) - 0x3FF;

The code above loads a 64-bit (IEEE-754 floating-point) double with a 32-bit integer (with no paddding bits) by storing the integer in the mantissa while the exponent is set to 252. From this newly minted double, 252 (expressed as a double) is subtracted, which sets the resulting exponent to the log base 2 of the input value, v. All that is left is shifting the exponent bits into position (20 bits right) and subtracting the bias, 0x3FF (which is 1023 decimal). This technique only takes 5 operations, but many CPUs are slow at manipulating doubles, and the endianess of the architecture must be accommodated.

Eric Cole sent me this on January 15, 2006. Evan Felix pointed out a typo on April 4, 2006. Vincent Lefèvre told me on July 9, 2008 to change the endian check to use the float’s endian, which could differ from the integer’s endian.

 


Find the log base 2 of an integer with a lookup table

static const char LogTable256[256] = 
{
#define LT(n) n, n, n, n, n, n, n, n, n, n, n, n, n, n, n, n
    -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
    LT(4), LT(5), LT(5), LT(6), LT(6), LT(6), LT(6),
    LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7), LT(7)
};

unsigned int v; // 32-bit word to find the log of
unsigned r;     // r will be lg(v)
register unsigned int t, tt; // temporaries

if (tt = v >> 16)
{
  r = (t = tt >> 8) ? 24 + LogTable256[t] : 16 + LogTable256[tt];
}
else 
{
  r = (t = v >> 8) ? 8 + LogTable256[t] : LogTable256[v];
}

The lookup table method takes only about 7 operations to find the log of a 32-bit value. If extended for 64-bit quantities, it would take roughly 9 operations. Another operation can be trimmed off by using four tables, with the possible additions incorporated into each. Using int table elements may be faster, depending on your architecture.

The code above is tuned to uniformly distributed output values. If your inputs are evenly distributed across all 32-bit values, then consider using the following:

if (tt = v >> 24) 
{
  r = 24 + LogTable256[tt];
} 
else if (tt = v >> 16) 
{
  r = 16 + LogTable256[tt];
} 
else if (tt = v >> 8) 
{
  r = 8 + LogTable256[tt];
} 
else 
{
  r = LogTable256[v];
}

To initially generate the log table algorithmically:

LogTable256[0] = LogTable256[1] = 0;
for (int i = 2; i < 256; i++) 
{
  LogTable256[i] = 1 + LogTable256[i / 2];
}
LogTable256[0] = -1; // if you want log(0) to return -1

Behdad Esfahbod and I shaved off a fraction of an operation (on average) on May 18, 2005. Yet another fraction of an operation was removed on November 14, 2006 by Emanuel Hoogeveen. The variation that is tuned to evenly distributed input values was suggested by David A. Butterfield on September 19, 2008. Venkat Reddy told me on January 5, 2009 that log(0) should return -1 to indicate an error, so I changed the first entry in the table to that.


Find the log base 2 of an N-bit integer in O(lg(N)) operations

unsigned int v;  // 32-bit value to find the log2 of 
const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
const unsigned int S[] = {1, 2, 4, 8, 16};
int i;

register unsigned int r = 0; // result of log2(v) will go here
for (i = 4; i >= 0; i--) // unroll for speed...
{
  if (v & b[i])
  {
    v >>= S[i];
    r |= S[i];
  } 
}

// OR (IF YOUR CPU BRANCHES SLOWLY):

unsigned int v;          // 32-bit value to find the log2 of 
register unsigned int r; // result of log2(v) will go here
register unsigned int shift;

r =     (v > 0xFFFF) << 4; v >>= r;
shift = (v > 0xFF  ) << 3; v >>= shift; r |= shift;
shift = (v > 0xF   ) << 2; v >>= shift; r |= shift;
shift = (v > 0x3   ) << 1; v >>= shift; r |= shift;
                                        r |= (v >> 1);

// OR (IF YOU KNOW v IS A POWER OF 2):

unsigned int v;  // 32-bit value to find the log2 of 
static const unsigned int b[] = {0xAAAAAAAA, 0xCCCCCCCC, 0xF0F0F0F0, 
                                 0xFF00FF00, 0xFFFF0000};
register unsigned int r = (v & b[0]) != 0;
for (i = 4; i > 0; i--) // unroll for speed...
{
  r |= ((v & b[i]) != 0) << i;
}

Of course, to extend the code to find the log of a 33- to 64-bit number, we would append another element, 0xFFFFFFFF00000000, to b, append 32 to S, and loop from 5 to 0. This method is much slower than the earlier table-lookup version, but if you don’t want big table or your architecture is slow to access memory, it’s a good choice. The second variation involves slightly more operations, but it may be faster on machines with high branch costs (e.g. PowerPC).

The second version was sent to me by Eric Cole on January 7, 2006. Andrew Shapira subsequently trimmed a few operations off of it and sent me his variation (above) on Sept. 1, 2007. The third variation was suggested to me by John Owens on April 24, 2002; it’s faster, but it is only suitable when the input is known to be a power of 2. On May 25, 2003, Ken Raeburn suggested improving the general case by using smaller numbers for b[], which load faster on some architectures (for instance if the word size is 16 bits, then only one load instruction may be needed). These values work for the general version, but not for the special-case version below it, where v is a power of 2; Glenn Slayden brought this oversight to my attention on December 12, 2003.

 


Find the log base 2 of an N-bit integer in O(lg(N)) operations with multiply and lookup

uint32_t v; // find the log base 2 of 32-bit v
int r;      // result goes here

static const int MultiplyDeBruijnBitPosition[32] = 
{
  0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
  8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31
};

v |= v >> 1; // first round down to one less than a power of 2 
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;

r = MultiplyDeBruijnBitPosition[(uint32_t)(v * 0x07C4ACDDU) >> 27];

The code above computes the log base 2 of a 32-bit integer with a small table lookup and multiply. It requires only 13 operations, compared to (up to) 20 for the previous method. The purely table-based method requires the fewest operations, but this offers a reasonable compromise between table size and speed.

If you know that v is a power of 2, then you only need the following:

static const int MultiplyDeBruijnBitPosition2[32] = 
{
  0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 
  31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
r = MultiplyDeBruijnBitPosition2[(uint32_t)(v * 0x077CB531U) >> 27];

Eric Cole devised this January 8, 2006 after reading about the entry below to round up to a power of 2 and the method below for computing the number of trailing bits with a multiply and lookup using a DeBruijn sequence. On December 10, 2009, Mark Dickinson shaved off a couple operations by requiring v be rounded up to one less than the next power of 2 rather than the power of 2.

 


Find integer log base 10 of an integer

unsigned int v; // non-zero 32-bit integer value to compute the log base 10 of 
int r;          // result goes here
int t;          // temporary

static unsigned int const PowersOf10[] = 
    {1, 10, 100, 1000, 10000, 100000,
     1000000, 10000000, 100000000, 1000000000};

t = (IntegerLogBase2(v) + 1) * 1233 >> 12; // (use a lg2 method from above)
r = t - (v < PowersOf10[t]);

The integer log base 10 is computed by first using one of the techniques above for finding the log base 2. By the relationship log10(v) = log2(v) / log2(10), we need to multiply it by 1/log2(10), which is approximately 1233/4096, or 1233 followed by a right shift of 12. Adding one is needed because the IntegerLogBase2 rounds down. Finally, since the value t is only an approximation that may be off by one, the exact value is found by subtracting the result of v < PowersOf10[t].

This method takes 6 more operations than IntegerLogBase2. It may be sped up (on machines with fast memory access) by modifying the log base 2 table-lookup method above so that the entries hold what is computed for t (that is, pre-add, -mulitply, and -shift). Doing so would require a total of only 9 operations to find the log base 10, assuming 4 tables were used (one for each byte of v).

Eric Cole suggested I add a version of this on January 7, 2006.

 


Find integer log base 10 of an integer the obvious way

unsigned int v; // non-zero 32-bit integer value to compute the log base 10 of 
int r;          // result goes here

r = (v >= 1000000000) ? 9 : (v >= 100000000) ? 8 : (v >= 10000000) ? 7 : 
    (v >= 1000000) ? 6 : (v >= 100000) ? 5 : (v >= 10000) ? 4 : 
    (v >= 1000) ? 3 : (v >= 100) ? 2 : (v >= 10) ? 1 : 0;

This method works well when the input is uniformly distributed over 32-bit values because 76% of the inputs are caught by the first compare, 21% are caught by the second compare, 2% are caught by the third, and so on (chopping the remaining down by 90% with each comparision). As a result, less than 2.6 operations are needed on average.

On April 18, 2007, Emanuel Hoogeveen suggested a variation on this where the conditions used divisions, which were not as fast as simple comparisons.


Find integer log base 2 of a 32-bit IEEE float

const float v; // find int(log2(v)), where v > 0.0 && finite(v) && isnormal(v)
int c;         // 32-bit int c gets the result;

c = *(const int *) &v;  // OR, for portability:  memcpy(&c, &v, sizeof c);
c = (c >> 23) - 127;

The above is fast, but IEEE 754-compliant architectures utilize subnormal (also called denormal) floating point numbers. These have the exponent bits set to zero (signifying pow(2,-127)), and the mantissa is not normalized, so it contains leading zeros and thus the log2 must be computed from the mantissa. To accomodate for subnormal numbers, use the following:

const float v;              // find int(log2(v)), where v > 0.0 && finite(v)
int c;                      // 32-bit int c gets the result;
int x = *(const int *) &v;  // OR, for portability:  memcpy(&x, &v, sizeof x);

c = x >> 23;          

if (c)
{
  c -= 127;
}
else
{ // subnormal, so recompute using mantissa: c = intlog2(x) - 149;
  register unsigned int t; // temporary
  // Note that LogTable256 was defined earlier
  if (t = x >> 16)
  {
    c = LogTable256[t] - 133;
  }
  else
  {
    c = (t = x >> 8) ? LogTable256[t] - 141 : LogTable256[x] - 149;
  }
}

On June 20, 2004, Sean A. Irvine suggested that I include code to handle subnormal numbers. On June 11, 2005, Falk Hüffner pointed out that ISO C99 6.5/7 specified undefined behavior for the common type punning idiom *(int *)&, though it has worked on 99.9% of C compilers. He proposed using memcpy for maximum portability or a union with a float and an int for better code generation than memcpy on some compilers.

 


Find integer log base 2 of the pow(2, r)-root of a 32-bit IEEE float (for unsigned integer r)

const int r;
const float v; // find int(log2(pow((double) v, 1. / pow(2, r)))), 
               // where isnormal(v) and v > 0
int c;         // 32-bit int c gets the result;

c = *(const int *) &v;  // OR, for portability:  memcpy(&c, &v, sizeof c);
c = ((((c - 0x3f800000) >> r) + 0x3f800000) >> 23) - 127;

So, if r is 0, for example, we have c = int(log2((double) v)). If r is 1, then we have c = int(log2(sqrt((double) v))). If r is 2, then we have c = int(log2(pow((double) v, 1./4))).

On June 11, 2005, Falk Hüffner pointed out that ISO C99 6.5/7 left the type punning idiom *(int *)& undefined, and he suggested using memcpy.

 


Count the consecutive zero bits (trailing) on the right linearly

unsigned int v;  // input to count trailing zero bits
int c;  // output: c will count v's trailing zero bits,
        // so if v is 1101000 (base 2), then c will be 3
if (v)
{
  v = (v ^ (v - 1)) >> 1;  // Set v's trailing 0s to 1s and zero rest
  for (c = 0; v; c++)
  {
    v >>= 1;
  }
}
else
{
  c = CHAR_BIT * sizeof(v);
}

The average number of trailing zero bits in a (uniformly distributed) random binary number is one, so this O(trailing zeros) solution isn’t that bad compared to the faster methods below.

Jim Cole suggested I add a linear-time method for counting the trailing zeros on August 15, 2007. On October 22, 2007, Jason Cunningham pointed out that I had neglected to paste the unsigned modifier for v.


Count the consecutive zero bits (trailing) on the right in parallel

unsigned int v;      // 32-bit word input to count zero bits on right
unsigned int c = 32; // c will be the number of zero bits on the right
v &= -signed(v);
if (v) c--;
if (v & 0x0000FFFF) c -= 16;
if (v & 0x00FF00FF) c -= 8;
if (v & 0x0F0F0F0F) c -= 4;
if (v & 0x33333333) c -= 2;
if (v & 0x55555555) c -= 1;

Here, we are basically doing the same operations as finding the log base 2 in parallel, but we first isolate the lowest 1 bit, and then proceed with c starting at the maximum and decreasing. The number of operations is at most 3 * lg(N) + 4, roughly, for N bit words.

Bill Burdick suggested an optimization, reducing the time from 4 * lg(N) on February 4, 2011.

 


Count the consecutive zero bits (trailing) on the right by binary search

unsigned int v;     // 32-bit word input to count zero bits on right
unsigned int c;     // c will be the number of zero bits on the right,
                    // so if v is 1101000 (base 2), then c will be 3
// NOTE: if 0 == v, then c = 31.
if (v & 0x1) 
{
  // special case for odd v (assumed to happen half of the time)
  c = 0;
}
else
{
  c = 1;
  if ((v & 0xffff) == 0) 
  {  
    v >>= 16;  
    c += 16;
  }
  if ((v & 0xff) == 0) 
  {  
    v >>= 8;  
    c += 8;
  }
  if ((v & 0xf) == 0) 
  {  
    v >>= 4;
    c += 4;
  }
  if ((v & 0x3) == 0) 
  {  
    v >>= 2;
    c += 2;
  }
  c -= v & 0x1;
}

The code above is similar to the previous method, but it computes the number of trailing zeros by accumulating c in a manner akin to binary search. In the first step, it checks if the bottom 16 bits of v are zeros, and if so, shifts v right 16 bits and adds 16 to c, which reduces the number of bits in v to consider by half. Each of the subsequent conditional steps likewise halves the number of bits until there is only 1. This method is faster than the last one (by about 33%) because the bodies of the if statements are executed less often.

Matt Whitlock suggested this on January 25, 2006. Andrew Shapira shaved a couple operations off on Sept. 5, 2007 (by setting c=1 and unconditionally subtracting at the end).

 


Count the consecutive zero bits (trailing) on the right by casting to a float

unsigned int v;            // find the number of trailing zeros in v
int r;                     // the result goes here
float f = (float)(v & -v); // cast the least significant bit in v to a float
r = (*(uint32_t *)&f >> 23) - 0x7f;

Although this only takes about 6 operations, the time to convert an integer to a float can be high on some machines. The exponent of the 32-bit IEEE floating point representation is shifted down, and the bias is subtracted to give the position of the least significant 1 bit set in v. If v is zero, then the result is -127.

 


Count the consecutive zero bits (trailing) on the right with modulus division and lookup

unsigned int v;  // find the number of trailing zeros in v
int r;           // put the result in r
static const int Mod37BitPosition[] = // map a bit value mod 37 to its position
{
  32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, 4,
  7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, 5,
  20, 8, 19, 18
};
r = Mod37BitPosition[(-v & v) % 37];

The code above finds the number of zeros that are trailing on the right, so binary 0100 would produce 2. It makes use of the fact that the first 32 bit position values are relatively prime with 37, so performing a modulus division with 37 gives a unique number from 0 to 36 for each. These numbers may then be mapped to the number of zeros using a small lookup table. It uses only 4 operations, however indexing into a table and performing modulus division may make it unsuitable for some situations. I came up with this independently and then searched for a subsequence of the table values, and found it was invented earlier by Reiser, according to Hacker’s Delight.

 


Count the consecutive zero bits (trailing) on the right with multiply and lookup

unsigned int v;  // find the number of trailing zeros in 32-bit v 
int r;           // result goes here
static const int MultiplyDeBruijnBitPosition[32] = 
{
  0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 
  31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
r = MultiplyDeBruijnBitPosition[((uint32_t)((v & -v) * 0x077CB531U)) >> 27];

Converting bit vectors to indices of set bits is an example use for this. It requires one more operation than the earlier one involving modulus division, but the multiply may be faster. The expression (v & -v) extracts the least significant 1 bit from v. The constant 0x077CB531UL is a de Bruijn sequence, which produces a unique pattern of bits into the high 5 bits for each possible bit position that it is multiplied against. When there are no bits set, it returns 0. More information can be found by reading the paper Using de Bruijn Sequences to Index 1 in a Computer Word by Charles E. Leiserson, Harald Prokof, and Keith H. Randall.

On October 8, 2005 Andrew Shapira suggested I add this. Dustin Spicuzza asked me on April 14, 2009 to cast the result of the multiply to a 32-bit type so it would work when compiled with 64-bit ints.

 


Round up to the next highest power of 2 by float casting

unsigned int const v; // Round this 32-bit value to the next highest power of 2
unsigned int r;       // Put the result here. (So v=3 -> r=4; v=8 -> r=8)

if (v > 1) 
{
  float f = (float)v;
  unsigned int const t = 1U << ((*(unsigned int *)&f >> 23) - 0x7f);
  r = t << (t < v);
}
else 
{
  r = 1;
}

The code above uses 8 operations, but works on all v <= (1<<31).

Quick and dirty version, for domain of 1 < v < (1<<25):

float f = (float)(v - 1);  
r = 1U << ((*(unsigned int*)(&f) >> 23) - 126);

Although the quick and dirty version only uses around 6 operations, it is roughly three times slower than the technique below (which involves 12 operations) when benchmarked on an Athlon™ XP 2100+ CPU. Some CPUs will fare better with it, though.

On September 27, 2005 Andi Smithers suggested I include a technique for casting to floats to find the lg of a number for rounding up to a power of 2. Similar to the quick and dirty version here, his version worked with values less than (1<<25), due to mantissa rounding, but it used one more operation.

 


Round up to the next highest power of 2

unsigned int v; // compute the next highest power of 2 of 32-bit v

v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;

In 12 operations, this code computes the next highest power of 2 for a 32-bit integer. The result may be expressed by the formula 1U << (lg(v – 1) + 1). Note that in the edge case where v is 0, it returns 0, which isn’t a power of 2; you might append the expression v += (v == 0) to remedy this if it matters. It would be faster by 2 operations to use the formula and the log base 2 method that uses a lookup table, but in some situations, lookup tables are not suitable, so the above code may be best. (On a Athlon™ XP 2100+ I’ve found the above shift-left and then OR code is as fast as using a single BSR assembly language instruction, which scans in reverse to find the highest set bit.) It works by copying the highest set bit to all of the lower bits, and then adding one, which results in carries that set all of the lower bits to 0 and one bit beyond the highest set bit to 1. If the original number was a power of 2, then the decrement will reduce it to one less, so that we round up to the same original value.

You might alternatively compute the next higher power of 2 in only 8 or 9 operations using a lookup table for floor(lg(v)) and then evaluating 1<<(1+floor(lg(v))); Atul Divekar suggested I mention this on September 5, 2010.

Devised by Sean Anderson, Sepember 14, 2001. Pete Hart pointed me to a couple newsgroup posts by him and William Lewis in February of 1997, where they arrive at the same algorithm.

 


Interleave bits the obvious way

unsigned short x;   // Interleave bits of x and y, so that all of the
unsigned short y;   // bits of x are in the even positions and y in the odd;
unsigned int z = 0; // z gets the resulting Morton Number.

for (int i = 0; i < sizeof(x) * CHAR_BIT; i++) // unroll for more speed...
{
  z |= (x & 1U << i) << i | (y & 1U << i) << (i + 1);
}

Interleaved bits (aka Morton numbers) are useful for linearizing 2D integer coordinates, so x and y are combined into a single number that can be compared easily and has the property that a number is usually close to another if their x and y values are close.

 


Interleave bits by table lookup

static const unsigned short MortonTable256[256] = 
{
  0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015, 
  0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055, 
  0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115, 
  0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155, 
  0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415, 
  0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455, 
  0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515, 
  0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555, 
  0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015, 
  0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055, 
  0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115, 
  0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155, 
  0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415, 
  0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455, 
  0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515, 
  0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555, 
  0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015, 
  0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055, 
  0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115, 
  0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155, 
  0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415, 
  0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455, 
  0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515, 
  0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555, 
  0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015, 
  0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055, 
  0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115, 
  0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155, 
  0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415, 
  0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455, 
  0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515, 
  0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
};

unsigned short x; // Interleave bits of x and y, so that all of the
unsigned short y; // bits of x are in the even positions and y in the odd;
unsigned int z;   // z gets the resulting 32-bit Morton Number.

z = MortonTable256[y >> 8]   << 17 | 
    MortonTable256[x >> 8]   << 16 |
    MortonTable256[y & 0xFF] <<  1 | 
    MortonTable256[x & 0xFF];

For more speed, use an additional table with values that are MortonTable256 pre-shifted one bit to the left. This second table could then be used for the y lookups, thus reducing the operations by two, but almost doubling the memory required. Extending this same idea, four tables could be used, with two of them pre-shifted by 16 to the left of the previous two, so that we would only need 11 operations total.


Interleave bits with 64-bit multiply

In 11 operations, this version interleaves bits of two bytes (rather than shorts, as in the other versions), but many of the operations are 64-bit multiplies so it isn’t appropriate for all machines. The input parameters, x and y, should be less than 256.

unsigned char x;  // Interleave bits of (8-bit) x and y, so that all of the
unsigned char y;  // bits of x are in the even positions and y in the odd;
unsigned short z; // z gets the resulting 16-bit Morton Number.

z = ((x * 0x0101010101010101ULL & 0x8040201008040201ULL) * 
     0x0102040810204081ULL >> 49) & 0x5555 |
    ((y * 0x0101010101010101ULL & 0x8040201008040201ULL) * 
     0x0102040810204081ULL >> 48) & 0xAAAA;

Holger Bettag was inspired to suggest this technique on October 10, 2004 after reading the multiply-based bit reversals here.

 


Interleave bits by Binary Magic Numbers

static const unsigned int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF};
static const unsigned int S[] = {1, 2, 4, 8};

unsigned int x; // Interleave lower 16 bits of x and y, so the bits of x
unsigned int y; // are in the even positions and bits from y in the odd;
unsigned int z; // z gets the resulting 32-bit Morton Number.  
                // x and y must initially be less than 65536.

x = (x | (x << S[3])) & B[3];
x = (x | (x << S[2])) & B[2];
x = (x | (x << S[1])) & B[1];
x = (x | (x << S[0])) & B[0];

y = (y | (y << S[3])) & B[3];
y = (y | (y << S[2])) & B[2];
y = (y | (y << S[1])) & B[1];
y = (y | (y << S[0])) & B[0];

z = x | (y << 1);

Determine if a word has a zero byte

// Fewer operations:
unsigned int v; // 32-bit word to check if any 8-bit byte in it is 0
bool hasZeroByte = ~((((v & 0x7F7F7F7F) + 0x7F7F7F7F) | v) | 0x7F7F7F7F);

The code above may be useful when doing a fast string copy in which a word is copied at a time; it uses 5 operations. On the other hand, testing for a null byte in the obvious ways (which follow) have at least 7 operations (when counted in the most sparing way), and at most 12.

// More operations:
bool hasNoZeroByte = ((v & 0xff) && (v & 0xff00) && (v & 0xff0000) && (v & 0xff000000))
// OR:
unsigned char * p = (unsigned char *) &v;  
bool hasNoZeroByte = *p && *(p + 1) && *(p + 2) && *(p + 3);

The code at the beginning of this section (labeled “Fewer operations”) works by first zeroing the high bits of the 4 bytes in the word. Subsequently, it adds a number that will result in an overflow to the high bit of a byte if any of the low bits were initialy set. Next the high bits of the original word are ORed with these values; thus, the high bit of a byte is set iff any bit in the byte was set. Finally, we determine if any of these high bits are zero by ORing with ones everywhere except the high bits and inverting the result. Extending to 64 bits is trivial; simply increase the constants to be 0x7F7F7F7F7F7F7F7F.

For an additional improvement, a fast pretest that requires only 4 operations may be performed to determine if the word may have a zero byte. The test also returns true if the high byte is 0x80, so there are occasional false positives, but the slower and more reliable version above may then be used on candidates for an overall increase in speed with correct output.

 

bool hasZeroByte = ((v + 0x7efefeff) ^ ~v) & 0x81010100;
if (hasZeroByte) // or may just have 0x80 in the high byte
{
  hasZeroByte = ~((((v & 0x7F7F7F7F) + 0x7F7F7F7F) | v) | 0x7F7F7F7F);
}

There is yet a faster method — use hasless(v, 1), which is defined below; it works in 4 operations and requires no subsquent verification. It simplifies to

#define haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)

The subexpression (v – 0x01010101UL), evaluates to a high bit set in any byte whenever the corresponding byte in v is zero or greater than 0x80. The sub-expression ~v & 0x80808080UL evaluates to high bits set in bytes where the byte of v doesn’t have its high bit set (so the byte was less than 0x80). Finally, by ANDing these two sub-expressions the result is the high bits set where the bytes in v were zero, since the high bits set due to a value greater than 0x80 in the first sub-expression are masked off by the second.

Paul Messmer suggested the fast pretest improvement on October 2, 2004. Juha Järvi later suggested hasless(v, 1) on April 6, 2005, which he found on P

CCI题目4-3:Tree from Sorted Array

直接二分数组,第一次二分得到的是根节点,对左半部分二分出来的树是左子树,有半部分二分出来的树是右子树,这样递归就可以了。



题目
Given a sorted (increasing order) array, write an algorithm to create a binary tree with minimal height.



代码


//
//  main.cpp
//  CCI.4.3.Tree from Sorted Array
//
//  Created by Qiu Xiangyu on 12-11-24.
//  Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

#include 
#include 
using namespace std;


struct TreeNode {
    int value;
    TreeNode *left;
    TreeNode *right;
    TreeNode(int val, TreeNode *l, TreeNode *r) {
        value = val;
        left = l;
        right = r;
    }
    TreeNode(int val = 0) {
        value = val;
        left = NULL;
        right = NULL;
    }
};

TreeNode *buildTreeFromSortedArray(vector &arr, size_t istart, size_t iend) {
    if (istart > iend) {
        return NULL;
    }
    int imid = (istart + iend + 1) / 2;
    TreeNode *root = new TreeNode(arr[imid]);
    root->left = buildTreeFromSortedArray(arr, istart, imid - 1);
    root->right = buildTreeFromSortedArray(arr, imid + 1, iend);
    return root;
}
TreeNode *buildTreeFromSortedArray(vector &arr) {
    return buildTreeFromSortedArray(arr, 0, arr.size() - 1);
}

int main(int argc, const char * argv[])
{
    // insert code here...
    std::cout << "Hello, World!\n";
    vector arr = {1,2,3,4,5,6,7,8,9,10};
    TreeNode *root = buildTreeFromSortedArray(arr);
    return 0;
}

CCI题目4-2:判断图中的两个节点是否连通

用广度优先遍历做就行了。
代码只写了核心,旁边的额辅助函数只定义了而已。



题目
Given a directed graph, design an algorithm to find out whether there is a route be- tween two nodes.



代码


//
//  main.cpp
//  CCI.4.2.Reachable in Graph
//
//  Created by Qiu Xiangyu on 12-11-24.
//  Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

#include 
#include 
#include 
using namespace std;
enum VisitState {
    Unvisite = 0,
    Visiting = 1,
    Visited = 2
    };
class GraphNode {
public:
    VisitState visiteState;
};
class Graph {
    
    
public:
    void setAllVisiteState(VisitState targetState);
    list adjacentNodes(GraphNode *node);
    bool isNodeInGraph(GraphNode *node);
    bool isNodeConnectToNode(GraphNode *sourceNode, GraphNode *targetNode) {
        if (!isNodeInGraph(sourceNode) || !isNodeInGraph(targetNode)) {
            return false;
        }
        setAllVisiteState(Unvisite);
        //unvisit, not visited at all
        //visiting, in the queue, but not examed and expaned
        //vivited, expanded and visited.
        queue q;
        q.push(sourceNode);
        while (q.size()) {
            GraphNode *curNode = q.front();
            q.pop();
            if (curNode->visiteState == Visited) {
                continue;
            }
            if (curNode == targetNode) {
                return true;
            }
            curNode->visiteState = Visited;
            list adjacents = adjacentNodes(curNode);
            for (list::iterator it = adjacents.begin(); it != adjacents.end(); ++it) {
                GraphNode *cnode = *it;
                if (cnode->visiteState == Unvisite) {
                    cnode->visiteState = Visiting;
                    q.push(cnode);
                }
            }
        }
        return false;
    }
};

int main(int argc, const char * argv[])
{
    // insert code here...
    std::cout << "Hello, World!\n";
    return 0;
}

CCI题目4-1:Check Balance of a Binary Tree,二叉树的基本算法

这里我把题目理解为,任意两个叶子节点(左右子树都空)的深度不超过1.
那就计算一下一棵树的叶子节点中最大的深度和最小的深度,两者不超过1即可。

PS.但是按照书上的solution,代码翻译出来的题意,对于叶子节点的理解是说Null节点。



代码中还包含了这一部分的一些基本代码,包括:
1.从string初始化一棵树(借鉴leetcode的表示方法)
2.递归实现的inorder,preorder,postorder深度优先遍历
3.queue实现的广度优先遍历



题目
Implement a function to check if a tree is balanced. For the purposes of this question, a balanced tree is defined to be a tree such that no two leaf nodes differ in distance from the root by more than one.



Code

<

pre>
//
// main.cpp
// CCI.4.1.Check Balance of a Tree
//
// Created by Qiu Xiangyu on 12-11-21.
// Copyright (c) 2012年 Qiu Xiangyu. All rights reserved.
//

include

include

using namespace std;

struct TreeNode {
int value;
TreeNode *left;
TreeNode *right;
TreeNode(int val, TreeNode *l, TreeNode *r) {
value = val;
left = l;
right = r;
}
TreeNode(int val = 0) {
value = val;
left = NULL;
right = NULL;
}
};

TreeNode *buildTreeFromString(string s) {
if (s.size() == 0 || s[0] == ‘#’) {
return NULL;
}
TreeNode *root = NULL;
queue<TreeNode**> que;
que.push(&root);
int v = 0;
bool isnull = false;
int istr = 0;
while (true) {
if (istr >= s.size() || s[istr] == ‘,’) {

        TreeNode **ppnode = que.front();
        que.pop();

        if (ppnode && !isnull) {
            TreeNode *newNode = new TreeNode(v);
            *ppnode = newNode;
            que.push(&newNode->left);
            que.push(&newNode->right);
        } else {
            que.push(NULL);
            que.push(NULL);
        }

        if (istr >= s.size()) {
            break;
        }

        v = 0;
    } else {
        if (s[istr] == '#') {
            v = 0;
            isnull = true;
        } else {
            v = 10 * v + s[istr] - '0';
            isnull = false;
        }
    }
    ++istr;
}
return root;

}

void inorder(TreeNode root, void (nodeHandler)(TreeNode* node) ) {
if (root == NULL) {
cout<<“null\n”;
return;
}
inorder(root->left, nodeHandler);
nodeHandler(root);
inorder(root->right, nodeHandler);
}

void preorder(TreeNode root, void (nodeHandler)(TreeNode* node) ) {
if (root == NULL) {
cout<<“null\n”;
return;
}
nodeHandler(root);
preorder(root->left, nodeHandler);
preorder(root->right, nodeHandler);
}

void postorder(TreeNode root, void (nodeHandler)(TreeNode* node) ) {
if (root == NULL) {
cout<<“null\n”;
return;
}
postorder(root->left, nodeHandler);
postorder(root->right, nodeHandler);
nodeHandler(root);
}

void breadthfirst(TreeNode root, void (nodeHandler)(TreeNode* node)) {
queue<TreeNode*> que;
que.push(root);
int count = root ? 1 : 0;
while (que.size()) {
TreeNode *node = que.front();
que.pop();
if (node) {
–count;
nodeHandler(node);
que.push(node->left);
que.push(node->right);
if (node->left) {
++count;
}
if (node->right) {
++count;
}
} else {
cout<<“n\n”;
que.push(NULL);
que.push(NULL);
}
if (0 == count) {
break;
}
}
}

void printNode(TreeNode *node) {
cout<value<<endl;
}

int mindepth(TreeNode *node) {
if (NULL == node) {
return 0;
}
int rightMin = mindepth(node->right);
if (node->left) {
int leftMin = mindepth(node->left);
if (node->right) {
return 1 + (leftMin < rightMin ? leftMin : rightMin);
} else {
return 1 + leftMin;
}
} else {
return 1 + rightMin;
}
}
int maxdepth(TreeNode *node) {
if (NULL == node) {
return 0;
}
int leftmax = maxdepth(node->left);
int rightmax = maxdepth(node->right);
return 1 + (leftmax > rightmax ? leftmax : rightmax);
}
bool isBalance(TreeNode *root) {
int dmax = maxdepth(root);
int dmin = mindepth(root);
return abs(dmax – dmin) <= 1;
}
int main(int argc, const char * argv[])
{
// insert code here…
std::cout << “Hello, World!\n”;
string instr = “1,2,3,4,#,#,#,5”;
TreeNode *root = buildTreeFromString(instr);
breadthfirst(root, printNode);
int dmax = maxdepth(root);
int dmin = mindepth(root);
cout<<“dmax:”<<dmax<<“,dmin:”<<dmin<<endl;
cout<<“Is balance:”<<isBalance(root)<<endl;
return 0;
}