| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 1024 人关注过本帖
标题:下载了别人的C语言写的代码,看上去费劲,求指导,重点是CHUNK页面
取消只看楼主 加入收藏
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
结帖率:66.67%
收藏
已结贴  问题点数:20 回复次数:2 
下载了别人的C语言写的代码,看上去费劲,求指导,重点是CHUNK页面
会的大神,不要推辞帮忙看看吧
metric_PAS_64.zip (224.03 KB)
搜索更多相关主题的帖子: C语言 
2016-08-02 15:47
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
收藏
得分:0 
回复 2楼 linlulu001
C代码
/* cdc chunk */
int file_chunk_cdc(int fd, vector* features) {
    unsigned char buf[BUF_MAX_SIZE] = {0};
    unsigned char buf_bz[BUF_MAX_SIZE] = {0};
    unsigned char block_buf[BLOCK_MAX_SIZE * 2] = {0};
    unsigned char last_block_buf[BLOCK_MAX_SIZE * 2] = {0};
    char win_buf[BLOCK_WIN_SIZE + 1] = {0};
    unsigned char md5_str[33] = {0};
    unsigned char adler_pre_char;
    unsigned char md5_checksum[32 + 1] = {0};
    unsigned int bpos = 0;
    unsigned int rwsize = 0, bzsize = 0;
    unsigned int exp_rwsize = BUF_MAX_SIZE;
    unsigned int head, tail;
    unsigned int block_sz = 0, old_block_sz = 0;
    unsigned int hkey = 0;
    int ret = 0;

    feature_t f = 0;
    while(rwsize = read(fd, buf + bpos, exp_rwsize))
    {
        /* last chunk */
        
        if ((rwsize + bpos + block_sz) < BLOCK_MIN_SIZE){
            break;
        }

        head = 0;
        tail = bpos + rwsize;
        /* avoid unnecessary computation and comparsion */
        if (block_sz < (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE))
        {
            old_block_sz = block_sz;
            block_sz = ((block_sz + tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : block_sz + tail -head;  
            memcpy(block_buf + old_block_sz, buf + head, block_sz - old_block_sz);
            head += (block_sz - old_block_sz);
        }

        while ((head + BLOCK_WIN_SIZE) <= tail)
        {
            memcpy(win_buf, buf + head, BLOCK_WIN_SIZE);
            /*
             * Firstly, i think rabinhash is the best. However, it's performance is very bad.
             * After some testing, i found ELF_hash is better both on performance and dedup rate.
             * So, EFL_hash is default. Now, adler_hash as default.
             */
            if (g_rolling_hash)
            {
                hkey = (block_sz == (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? adler32_checksum(win_buf, BLOCK_WIN_SIZE) :
                    adler32_rolling_checksum(hkey, BLOCK_WIN_SIZE, adler_pre_char, buf[head+BLOCK_WIN_SIZE-1]);
            }
            else
                hkey = g_cdc_chunk_hashfunc(win_buf);

            /* get a normal chunk */
            if ((hkey % g_block_size) == CHUNK_CDC_R)
            {
                memcpy(block_buf + block_sz, buf + head, BLOCK_WIN_SIZE);
                head += BLOCK_WIN_SIZE;
                block_sz += BLOCK_WIN_SIZE;
                if (block_sz >= BLOCK_MIN_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }
            else
            {
                block_buf[block_sz++] = buf[head++];
                /* get an abnormal chunk */
                if (block_sz >= BLOCK_MAX_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }

            /* avoid unnecessary computation and comparsion */
            if (block_sz == 0)
            {
                block_sz = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : tail - head;
                memcpy(block_buf, buf + head, block_sz);
                head = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    head + (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE) : tail;
            }

            adler_pre_char = buf[head -1];
        }

        /* read expected data from file to full up buf */
        bpos = tail - head;
        exp_rwsize = BUF_MAX_SIZE - bpos;
        adler_pre_char = buf[head -1];
        memmove(buf, buf + head, bpos);
    }
    /* last chunk */

    int last_block_len = ((rwsize + bpos + block_sz) >= 0) ? rwsize + bpos + block_sz : 0;
    if (last_block_len > 0)
    {
        memcpy(last_block_buf, block_buf, block_sz);
        memcpy(last_block_buf + block_sz, buf, rwsize + bpos);

        md5(last_block_buf, last_block_len, md5_checksum);
        f = md5_2_feature(md5_checksum);
        VEC_PUSH_BACK(features, &f);
    }

_FILE_CHUNK_CDC_EXIT:
    return 0;
}

改写的java代码
package cn.edu.cust.deduple;

import
import
import
import
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import cn.edu.cust.deduple.utils.Checksum;
import cn.edu.cust.deduple.utils.MapUtils;
import cn.edu.cust.deduple.utils.Md5Util;

public class CDC {

    private static Map<String, Long> fingerPrints = new HashMap<String, Long>();
   
    static final int BUF_MAX_SZ = 128 * 1024;
    static final int BLOCK_MAX_SZ = 4096;
    static final int BLOCK_WIN_SZ = 32;
    static final int BLOCK_MIN_SZ = 64;
   
    static final int BLOCK_SZ = 4096;
    static final int CHUNK_CDC_R = 13;
   
    FileChunks fileChunk(File f) throws IOException {
        byte buf[] = new byte[BUF_MAX_SZ];
        byte block_buf[] = new byte[BLOCK_MAX_SZ + BLOCK_WIN_SZ];
        byte win_buf[] = new byte[BLOCK_WIN_SZ + 1];
        byte adler_pre_char = 0;
        //unsigned char md5_checksum[16 + 1] = {0};
        //unsigned char csum[10 + 1] = {0};
        int bpos = 0;
        int rwsize = 0;
        int exp_rwsize = BUF_MAX_SZ;
        int head, tail;
        int block_sz = 0, old_block_sz = 0;
        long hkey = 0;
        //chunk_block_entry chunk_bentry;
        long offset = 0;

        FileInputStream fin = new FileInputStream(f);
        FileChannel fc = fin.getChannel();
        ByteBuffer bb = ByteBuffer.wrap(buf, bpos, exp_rwsize);
        FileChunks fcs = new FileChunks();
        fcs.fileName = f.getName();
        
        while((rwsize = fc.read(bb)) >= 0) {
            /* last chunk */
            //System.out.println("rwsize:" + rwsize);
            if ((rwsize + bpos + block_sz) < BLOCK_MIN_SZ)
                break;

            head = 0;
            tail = bpos + rwsize;
            /* avoid unnecessary computation and comparsion */
            if (block_sz < (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) {
                old_block_sz = block_sz;
                block_sz = ((block_sz + tail - head) > (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ?
                    BLOCK_MIN_SZ - BLOCK_WIN_SZ : block_sz + tail -head;
                System.arraycopy(buf, head, block_buf, old_block_sz, block_sz - old_block_sz);
                //memcpy(block_buf + old_block_sz, buf + head, block_sz - old_block_sz);
                head += (block_sz - old_block_sz);
            }

            while ((head + BLOCK_WIN_SZ) <= tail) {
                System.arraycopy(buf, head, win_buf, 0, BLOCK_WIN_SZ);
                //memcpy(win_buf, buf + head, BLOCK_WIN_SZ);
                hkey = (block_sz == (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ? Checksum.adler32_checksum(win_buf, BLOCK_WIN_SZ) :
                    Checksum.adler32_rolling_checksum((int)hkey, BLOCK_WIN_SZ, adler_pre_char, buf[head+BLOCK_WIN_SZ-1]);
                //System.out.println("hkey:" + (hkey % BLOCK_SZ));
                /* get a normal chunk, write block info to chunk file */
                if ((hkey % BLOCK_SZ) == CHUNK_CDC_R) {
                    //System.out.println(block_sz + BLOCK_WIN_SZ);
                    System.arraycopy(buf, head, block_buf, block_sz, BLOCK_WIN_SZ);
                    //memcpy(block_buf + block_sz, buf + head, BLOCK_WIN_SZ);
                    head += BLOCK_WIN_SZ;
                    block_sz += BLOCK_WIN_SZ;
                    if(block_sz > BLOCK_MAX_SZ){
                        //System.out.println(">4096:" + block_sz);
                    }
                    if (block_sz >= BLOCK_MIN_SZ) {
                        fcs.addChunk(offset, block_sz, block_buf);
                        /*md5(block_buf, block_sz, md5_checksum);
                        uint_2_str(adler32_checksum(block_buf, block_sz), csum);
                        chunk_file_hdr->block_nr++;
                        chunk_bentry.len = block_sz;
                        chunk_bentry.offset = offset;
                        memcpy(chunk_bentry.md5, md5_checksum, 16 + 1);
                        memcpy(chunk_bentry.csum, csum, 10 + 1);
                        rwsize = write(fd_chunk, &chunk_bentry, CHUNK_BLOCK_ENTRY_SZ);
                        if (rwsize == -1 || rwsize != CHUNK_BLOCK_ENTRY_SZ)
                            return -1;*/
                        offset += block_sz;
                        block_sz = 0;
                    }
                } else {
                    block_buf[block_sz++] = buf[head++];
                    /* get an abnormal chunk, write block info to chunk file */
                    if (block_sz >= BLOCK_MAX_SZ) {
                        fcs.addChunk(offset, block_sz, block_buf);
                        /*md5(block_buf, block_sz, md5_checksum);
                        uint_2_str(adler32_checksum(block_buf, block_sz), csum);
                        chunk_file_hdr->block_nr++;
                        chunk_bentry.len = block_sz;
                        chunk_bentry.offset = offset;
                        memcpy(chunk_bentry.md5, md5_checksum, 16+1);
                        memcpy(chunk_bentry.csum, csum, 10 + 1);
                        rwsize = write(fd_chunk, &chunk_bentry, CHUNK_BLOCK_ENTRY_SZ);
                        if (rwsize == -1 || rwsize != CHUNK_BLOCK_ENTRY_SZ)
                            return -1;*/
                        offset += block_sz;
                        block_sz = 0;
                    }
                }

                /* avoid unnecessary computation and comparsion */
                if (block_sz == 0) {
                    block_sz = ((tail - head) > (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ?
                        BLOCK_MIN_SZ - BLOCK_WIN_SZ : tail - head;
                    System.arraycopy(buf, head, block_buf, 0, block_sz);
                    //memcpy(block_buf, buf + head, block_sz);
                    head = ((tail - head) > (BLOCK_MIN_SZ - BLOCK_WIN_SZ)) ?
                        head + (BLOCK_MIN_SZ - BLOCK_WIN_SZ) : tail;
                }
                adler_pre_char = buf[head - 1];
            }

            /* read expected data from file to full up buf */
            bpos = tail - head;
            exp_rwsize = BUF_MAX_SZ - bpos;
            adler_pre_char = buf[head - 1];
            System.arraycopy(buf, head, buf, 0, bpos);
            //memmove(buf, buf + head, bpos);
            bb = ByteBuffer.wrap(buf, bpos, exp_rwsize);
        }

        fin.close();
        return fcs;
        /*if (rwsize == -1)
            return -1;

        return 0;*/
    }
}

2016-08-04 09:37
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
收藏
得分:0 
我就是仿照这段C写的java,有什么问题吗?
2016-08-04 09:37
快速回复:下载了别人的C语言写的代码,看上去费劲,求指导,重点是CHUNK页面
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.049158 second(s), 10 queries.
Copyright©2004-2024, BCCN.NET, All Rights Reserved