基础算法总结

算法

1、给定一个8*8的方格子，A点到B点的最短路径有多少条？用算法实现。（回溯法）

广度优先搜索只能找出一条最短路径
8*8方格

答：从图中可以看出，A点到B点的最短路径为16，即A点横走8小格，纵走8小格才能最快到达B点，这是排列组合的问题，即从最短路径16中选取8个横走的小格子(或者从最短路径16中选取8个纵走的小格子)。所以从A点到B点的最短路径条数，直接可以算出来，即为：

代码如下：

size_t g_num = 0;  //统计A点到B点的最短路径条数
void shortestPathNumber(char grid[9][9], int row, int col, int &step)
{
    if (row < 0 || row > 8 || col < 0 || col > 8 || grid[row][col] == '*' || step > 16)
    {
        return;
    }
    if (row == 0 && col == 8) 
    {
        if (step == 16)  //已到达B点，且等于最短路径16，就累加
        {
            g_num++;
        }
    }
    else
    {
        grid[row][col] = '*'; //标记该点已访问
        step++;
        shortestPathNumber(grid, row, col + 1, step); //向4个方向走
        shortestPathNumber(grid, row + 1, col, step);
        shortestPathNumber(grid, row, col - 1, step);
        shortestPathNumber(grid, row - 1, col, step);
        grid[row][col] = '.'; //回溯
        step--;
    }
}
int _tmain(int argc, _TCHAR* argv[])
{
    char grid[9][9] = {0};
    int step = 0;
    shortestPathNumber(grid, 8, 0, step); //从A点开始搜索
    cout<<"A点到B点的最短路径条数为: "<<g_num<<endl;
    return 0;
}

2、定义字符串的左旋转操作，把字符串前面的若干个字符移动到字符串的尾部。

要求：对长度为n的字符串操作的时间复杂度为O(n)，辅助内存为O(1)。
举例：把字符串abcdef左旋转2位得到字符串cdefab。

#include "stdafx.h"
#include <iostream>
using namespace std;
void swap(char *str, int begin, int end)
{
    char ch;
    while (begin < end)
    {
        ch = *(str + begin);
        *(str + begin) = *(str + end);
        *(str + end) = ch;
        begin++;
        end--;
    }
}

void Rotate(char *str, int length ,int m)
{
    if (NULL == str || length == 1)
    {
        return;
    }
    swap(str, 0, m - 1);
    swap(str, m, length - 1);
    swap(str, 0, length - 1);
}
int _tmain(int argc, _TCHAR* argv[])
{
    char chArr[] = "abcdef";
    char *p = chArr;
    cout<<p<<endl;
    Rotate(p, strlen(chArr), 2);
    cout<<p<<endl;
    return 0;
}

3、输入一个单向链表，将该单链表逆置。

举例：原来链表为1->2->3->4->5翻转为5->4->3->2->1

struct ListNode
{
    int m_nKey;
    ListNode* m_pNext;
};

#include "stdafx.h"
#include <iostream>
#include <fstream>
using namespace std;

struct ListNode
{
    int m_nKey;
    ListNode* m_pNext;
};

//构造链表
void CreateList(ListNode *&pHead)
{
    fstream fin("list.txt");
    ListNode *pNode = NULL;
    ListNode *pTmp = NULL;
    int data;
    fin>>data;
    while (data)
    {
        pNode = new ListNode;
        pNode->m_nKey = data;
        pNode->m_pNext = NULL;
        if (NULL == pHead)
        {
            pHead = pNode;
            pTmp = pNode;
        }
        else
        {
            pTmp->m_pNext = pNode;
            pTmp = pNode;
        }

        fin>>data;
    }
}

//翻转单链表
void ReverseLink(ListNode *&pHead)
{
    if (NULL == pHead)
    {
        return;
    }
    ListNode *pNode = pHead;
    ListNode *Prev = NULL;
    ListNode *pNext = NULL;
    while (NULL != pNode)
    {
        pNext = pNode->m_pNext;
        if (NULL == pNext)
        {
            pHead = pNode;
        }
        pNode->m_pNext = Prev;
        Prev = pNode;
        pNode = pNext;
    }
}

void PrintList(ListNode *pHead)
{
    if (NULL == pHead)
    {
        return;
    }
    ListNode *pNode = pHead;
    while (NULL != pNode)
    {
        cout<<pNode->m_nKey<<"  ";
        pNode = pNode->m_pNext;
    }
    cout<<endl;
}

int _tmain(int argc, _TCHAR* argv[])
{
    ListNode *pHead = NULL;
    cout<<"原来的链表：";
    CreateList(pHead);
    PrintList(pHead);
    ReverseLink(pHead);
    cout<<"翻转的链表：";
    PrintList(pHead);

    return 0;
}

4、海量数据提取：分治法

1）、分治法：

例：从1亿个ip中找出访问次数最多的IP

处理海量数据问题存在的原因就在于1）数据量太大，无法在短时间内解决；2）内存不够，没办法装下那么多的数据。
    而对应的办法其实也就是分成1）针对时间，合适的算法+合适的数据结构来提高处理效率；2）针对空间，就是分而治之，将大数据量拆分成多个比较小的数据片，然后对其各个数据片进行处理，最后再处理各个数据片的结果。

a、首先，生成1亿条数据，为了产生更多的重复ip，前面两节就不变了，只随机生成后面的2节。

private static String generateIp() {
    return "192.168." + (int) (Math.random() * 255) + "." + (int) (Math.random() * 255) + "\n";
}
private static void generateIpsFile() {
    File file = new File(FILE_NAME);
    try {
        FileWriter fileWriter = new FileWriter(file);
        for (int i = 0; i < MAX_NUM; i++) {
            fileWriter.write(generateIp());
        }
        fileWriter.close();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

1个char是一个Byte，每个ip大概是15Btye，所以生成的ip文件，大概是1,500,000,000Byte = 1,500,000 KB

b、文件生成了，那么我们现在就要假设内存不是很够，没有办法一次性装入那么多的数据，所以要先把文件给拆分成多个小文件。

在这里采取的是就是Hash取模的方式，将字符串的ip地址给转换成一个长整数，并将这个数对1000取模，将模一样的ip放到同一个文件，这样就能够生成1000个小文件，每个文件就只有1M多，在这里已经是足够小的了。

首先是hash跟取模函数：

private static String hash(String ip) {
    long numIp = ipToLong(ip);
    return String.valueOf(numIp % HASH_NUM);
}

private static long ipToLong(String strIp) {
    long[] ip = new long[4];
    int position1 = strIp.indexOf(".");
    int position2 = strIp.indexOf(".", position1 + 1);
    int position3 = strIp.indexOf(".", position2 + 1);

    ip[0] = Long.parseLong(strIp.substring(0, position1));
    ip[1] = Long.parseLong(strIp.substring(position1 + 1, position2));
    ip[2] = Long.parseLong(strIp.substring(position2 + 1, position3));
    ip[3] = Long.parseLong(strIp.substring(position3 + 1));
    return (ip[0] << 24) + (ip[1] << 16) + (ip[2] << 8) + ip[3];
}

下面是拆文件的函数：


private static void divideIpsFile() {
    File file = new File(FILE_NAME);
    Map<String, StringBuilder> map = new HashMap<String, StringBuilder>();
    int count = 0;
    try {
        FileReader fileReader = new FileReader(file);
        BufferedReader br = new BufferedReader(fileReader);
        String ip;
        while ((ip = br.readLine()) != null) {
            String hashIp = hash(ip);
            if (map.containsKey(hashIp)) {
                StringBuilder sb = (StringBuilder) map.get(hashIp);
                sb.append(ip).append("\n");
                map.put(hashIp, sb);
            } else {
                StringBuilder sb = new StringBuilder(ip);
                sb.append("\n");
                map.put(hashIp, sb);
            }
            count++;
            if (count == 4000000) {
                Iterator<String> it = map.keySet().iterator();
                while (it.hasNext()) {
                    String fileName = it.next();
                    File ipFile = new File(FOLDER + "/" + fileName + ".txt");
                    FileWriter fileWriter = new FileWriter(ipFile, true);
                    StringBuilder sb = map.get(fileName);
                    fileWriter.write(sb.toString());
                    ;
                    fileWriter.close();
                }
                count = 0;
                map.clear();
            }
        }
        br.close();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (Exception e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

在这里，我们如果每读取一个ip，经过hash映射之后，就直接打开文件，将其加到对应的文件末尾，那么有1亿条ip，我们就要读写文件1亿次，那IO开销的时候就相当大，所以我们可以先拿一个Map放着，等到一定的规模之后，再统一写进文件，然后把map清空，继续映射，这样的话，就能够提高折分的速度。而这个规模，就是根据能处理的内存来取的值的，如果内存够大，这个值就可以设置大点，如果内存小，就要设置小一点的值，IO开销跟内存大小，总是需要在这两者之间的取个平衡点的。
而这种映射可以保证同样的IP会映射到相同的文件中，这样后面在统计IP的时候，就可以保证在a文件中不是最多次数的ip（即使是第2多），也不会出现在其它的文件中。

c、文件拆分了之后，接下来我们就要分别读取这1000个小文件，统计其中每个IP出现的次数。

private static void calculate() {
    File folder = new File(FOLDER);
    File[] files = folder.listFiles();
    FileReader fileReader;
    BufferedReader br;
    for (File file : files) {
        try {
            fileReader = new FileReader(file);
            br = new BufferedReader(fileReader);
            String ip;
            Map<String, Integer> tmpMap = new HashMap<String, Integer>();
            while ((ip = br.readLine()) != null) {
                if (tmpMap.containsKey(ip)) {
                    int count = tmpMap.get(ip);
                    tmpMap.put(ip, count + 1);
                } else {
                    tmpMap.put(ip, 0);
                }
            }
            fileReader.close();
            br.close();
            count(tmpMap, map);
            tmpMap.clear();
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    count(map, finalMap);
    Iterator<String> it = finalMap.keySet().iterator();
    while (it.hasNext()) {
        String ip = it.next();
        System.out.println("result IP : " + ip + " | count = " + finalMap.get(ip));
    }

}

private static void count(Map<String, Integer> pMap, Map<String, Integer> resultMap) {
    Iterator<Entry<String, Integer>> it = pMap.entrySet().iterator();
    int max = 0;
    String resultIp = "";
    while (it.hasNext()) {
        Entry<String, Integer> entry = (Entry<String, Integer>) it.next();
        if (entry.getValue() > max) {
            max = entry.getValue();
            resultIp = entry.getKey();
        }
    }
    resultMap.put(resultIp, max);
}

第一步要读取每个文件，将其中的ip放到一个Map中，然后调用count()方法，找出map中最大访问次数的ip，将ip和最多访问次数存到另外一个map中。
当1000个文件都读取完之后，我们就会产生一个有1000条记录的map，里面存储了每个文件中访问次数最多的ip，我们再调用count()方法，找出这个map中访问次数最大的ip，即这1000个文件中，哪个文件中的最高访问量的IP，才是真正最高的，好像小组赛到决赛一样。。。。
在这里没有用到什么堆排序和快速排序，因为只需要一个最大值，所以只要拿当前的最大值跟接下来的值判断就好，其实也相当跟只有一个元素的堆的堆顶元素比较。

4、排序算法

例：对一个数组进行nums排序，已知数组长度count。使用多种排序方式。

1）、快速排序

快速排序由于排序效率在同为O(N*logN)的几种排序方法中效率较高，因此经常被采用，再加上快速排序思想----分治法也确实实用。
该方法的基本思想是：
1．先从数列中取出一个数作为基准数。
2．分区过程，将比这个数大的数全放到它的右边，小于或等于它的数全放到它的左边。
3．再对左右区间重复第二步，直到各区间只有一个数。

//快速排序
void quick_sort(int nums[], int l, int r)
{
    if (l < r)
    {
        //Swap(nums[l], nums[(l + r) / 2]); //将中间的这个数和第一个数交换 参见注1
        int i = l, j = r, x = nums[l];
        while (i < j)
        {
            while(i < j && nums[j] >= x) // 从右向左找第一个小于x的数
                j--;
            if(i < j)
                nums[i++] = nums[j];

            while(i < j && nums[i] < x) // 从左向右找第一个大于等于x的数
                i++;
            if(i < j)
                nums[j--] = nums[i];
        }
        nums[i] = x;
        quick_sort(nums, l, i - 1); // 递归调用 
        quick_sort(nums, i + 1, r);
    }
}

我的工作经验分享

算法

1、给定一个8*8的方格子，A点到B点的最短路径有多少条？用算法实现。（回溯法）

2、定义字符串的左旋转操作，把字符串前面的若干个字符移动到字符串的尾部。

3、输入一个单向链表，将该单链表逆置。

4、海量数据提取：分治法

1）、分治法：

a、首先，生成1亿条数据，为了产生更多的重复ip，前面两节就不变了，只随机生成后面的2节。

b、文件生成了，那么我们现在就要假设内存不是很够，没有办法一次性装入那么多的数据，所以要先把文件给拆分成多个小文件。

c、文件拆分了之后，接下来我们就要分别读取这1000个小文件，统计其中每个IP出现的次数。

4、排序算法

1）、快速排序

2）、堆排序

3）、二分排序

添加新评论

最新文章

最近回复

分类

标签

归档

其它