16、Redis 源码解析 - Redis输入输出的抽象(rio)

Redis 输入输出的抽象(rio)

1. 概述

rio是Redis对IO操作的一个抽象,可以面向不同的输入输出设备,例如一个缓冲区IO、文件IO和socket IO。

一个rio对象提供一下四个方法:

  • read:读操作
  • write:写操作
  • tell:读写的偏移量
  • flush:冲洗缓冲区操作

使用C语言,实现面向对象的思想。

2. rio对象的抽象

 struct _rio {
    /* Backend functions.
     * Since this functions do not tolerate short writes or reads the return
     * value is simplified to: zero on error, non zero on complete success. */
    // 读,写,读写偏移量、冲洗操作的函数指针,非0表示成功
    size_t (*read)(struct _rio *, void *buf, size_t len);
    size_t (*write)(struct _rio *, const void *buf, size_t len);
    off_t (*tell)(struct _rio *);
    int (*flush)(struct _rio *);
    /* The update_cksum method if not NULL is used to compute the checksum of
     * all the data that was read or written so far. The method should be
     * designed so that can be called with the current checksum, and the buf
     * and len fields pointing to the new block of data to add to the checksum
     * computation. */
    // 计算和校验函数
    void (*update_cksum)(struct _rio *, const void *buf, size_t len);

    /* The current checksum */
    // 当前校验和
    uint64_t cksum;

    /* number of bytes read or written */
    // 读或写的字节数
    size_t processed_bytes;

    /* maximum single read or write chunk size */
    // 每次读或写的最大字节数
    size_t max_processing_chunk;

    /* Backend-specific vars. */
    // 读写的各种对象
    union {
        /*内存缓冲区 In-memory buffer target. */
        struct {
            sds ptr;    //缓冲区的指针,本质是char *
            off_t pos;  //缓冲区的偏移量
        } buffer;

        /*标准文件IO Stdio file pointer target. */
        struct {
            FILE *fp;       // 文件指针,指向被打开的文件
            off_t buffered; /* 最近一次同步之后所写的字节数 Bytes written since last fsync. */
            off_t autosync; /* 写入设置的autosync字节后,会执行fsync()同步 fsync after 'autosync' bytes written. */
        } file;

        /*文件描述符 Multiple FDs target (used to write to N sockets). */
        struct {
            int *fds;       /*文件描述符数组 File descriptors. */
            int *state;     /*每一个fd所对应的errno  Error state of each fd. 0 (if ok) or errno. */
            int numfds;     // 数组长度,文件描述符个数
            off_t pos;      // 偏移量
            sds buf;        // 缓冲区
        } fdset;
    } io;
};

typedef struct _rio rio;

下面的函数是流对象的接口。

2.1 读操作

 static inline size_t rioRead(rio *r, void *buf, size_t len) {
    while (len) {
        // 读的字节长度,不能超过每次读或写的最大字节数max_processing_chunk
        size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
        // 调用自身的read方法读到buf中
        if (r->read(r,buf,bytes_to_read) == 0)
            return 0;
        // 更新和校验
        if (r->update_cksum) r->update_cksum(r,buf,bytes_to_read);
        // 更新偏移量,指向下一个读的位置
        buf = (char*)buf + bytes_to_read;
        // 计算剩余要读的长度
        len -= bytes_to_read;
        // 更新读或写的字节数
        r->processed_bytes += bytes_to_read;
    }
    return 1;
}

2.2 写操作

 static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
    while (len) {
        // 写的字节长度,不能超过每次读或写的最大字节数max_processing_chunk
        size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
        // 更新和校验
        if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write);
        // 调用自身的write方法写入
        if (r->write(r,buf,bytes_to_write) == 0)
            return 0;
        // 更新偏移量,指向下一个写的位置
        buf = (char*)buf + bytes_to_writ;
        // 计算剩余写入的长度
        len -= bytes_to_write;
        // 更新读或写的字节数
        r->processed_bytes += bytes_to_write;
    }
    return 1;
}

2.3 返回当前偏移量

 static inline off_t rioTell(rio *r) {
    return r->tell(r);
}

2.4 缓冲区冲洗函数

 static inline int rioFlush(rio *r) {
    return r->flush(r);
}

3. rio对象的实现

对rio抽象的结构体中,使用了一个共用体(union),它可能是三种不同的对象,分别是:

  • 缓冲区 IO(Buffer I/O)
  • 标准输入输出 IO(Stdio file pointer)
  • 文件描述符集合(File descriptors set)

下面给出所有对象的实现:rio.c 和 rio.h 文件详细注释

3.1 缓冲区IO

 /* ------------------------- Buffer I/O implementation ----------------------- */
// 缓冲区IO实现

/* Returns 1 or 0 for success/failure. */
// 将len长的buf写到一个缓冲区对象r中
static size_t rioBufferWrite(rio *r, const void *buf, size_t len) {
    r->io.buffer.ptr = sdscatlen(r->io.buffer.ptr,(char*)buf,len);  //追加操作
    r->io.buffer.pos += len;    //更新偏移量
    return 1;
}

/* Returns 1 or 0 for success/failure. */
// 讲缓冲区对象r读到buf中,读len长
static size_t rioBufferRead(rio *r, void *buf, size_t len) {
    // 缓冲区对象的长度小于len,不够读,返回0
    if (sdslen(r->io.buffer.ptr)-r->io.buffer.pos < len)
        return 0; /* not enough buffer to return len bytes. */
    // 读到buf中
    memcpy(buf,r->io.buffer.ptr+r->io.buffer.pos,len);
    // 更新偏移量
    r->io.buffer.pos += len;
    return 1;
}

/* Returns read/write position in buffer. */
// 返回缓冲区对象r当前的偏移量
static off_t rioBufferTell(rio *r) {
    return r->io.buffer.pos;
}

/* Flushes any buffer to target device if applicable. Returns 1 on success
 * and 0 on failures. */
// 清洗缓冲区
static int rioBufferFlush(rio *r) {
    UNUSED(r);  //void r,强转成void类型对象,缓冲区就相当于释放
    return 1; /* Nothing to do, our write just appends to the buffer. */
}

// 定义一个缓冲区对象并初始化方法和成员
static const rio rioBufferIO = {
    rioBufferRead,
    rioBufferWrite,
    rioBufferTell,
    rioBufferFlush,
    NULL,           /* update_checksum */
    0,              /* current checksum */
    0,              /* bytes read or written */
    0,              /* read/write chunk size */
    { { NULL, 0 } } /* union for io-specific vars */
};

// 初始化缓冲区对象r并设置缓冲区的地址
void rioInitWithBuffer(rio *r, sds s) {
    *r = rioBufferIO;
    r->io.buffer.ptr = s;
    r->io.buffer.pos = 0;
}

3.2 标准输入输出 IO

 /* --------------------- Stdio file pointer implementation ------------------- */
// 标准文件IO实现
/* Returns 1 or 0 for success/failure. */
// 将len长的buf写入一个文件流对象
static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
    size_t retval;

    // 调用底层库函数
    retval = fwrite(buf,len,1,r->io.file.fp);
    // 更新写入的长度
    r->io.file.buffered += len;

    // 如果已经达到自动的同步autosync所设置的字节数
    if (r->io.file.autosync &&
        r->io.file.buffered >= r->io.file.autosync)
    {
        // 冲洗键盘缓冲区中的数据到文件中
        fflush(r->io.file.fp);
        // 同步操作
        aof_fsync(fileno(r->io.file.fp));
        // 长度初始化为0
        r->io.file.buffered = 0;
    }
    return retval;
}

/* Returns 1 or 0 for success/failure. */
// 从文件流对象r中读出len长度的字节到buf中
static size_t rioFileRead(rio *r, void *buf, size_t len) {
    return fread(buf,len,1,r->io.file.fp);
}

/* Returns read/write position in file. */
// 返回文件流对象的偏移量
static off_t rioFileTell(rio *r) {
    return ftello(r->io.file.fp);
}

/* Flushes any buffer to target device if applicable. Returns 1 on success
 * and 0 on failures. */
// 清洗文件流
static int rioFileFlush(rio *r) {
    return (fflush(r->io.file.fp) == 0) ? 1 : 0;
}

// 初始化一个文件流对象
static const rio rioFileIO = {
    rioFileRead,
    rioFileWrite,
    rioFileTell,
    rioFileFlush,
    NULL,           /* update_checksum */
    0,              /* current checksum */
    0,              /* bytes read or written */
    0,              /* read/write chunk size */
    { { NULL, 0 } } /* union for io-specific vars */
};

// 初始化一个文件流对象且设置对应文件
void rioInitWithFile(rio *r, FILE *fp) {
    *r = rioFileIO;
    r->io.file.fp = fp;
    r->io.file.buffered = 0;
    r->io.file.autosync = 0;
}

3.3 文件描述符集合

 /* ------------------- File descriptors set implementation ------------------- */
// 文件描述符合集合实现
/* Returns 1 or 0 for success/failure.
 * The function returns success as long as we are able to correctly write
 * to at least one file descriptor.
 *
 * When buf is NULL and len is 0, the function performs a flush operation
 * if there is some pending buffer, so this function is also used in order
 * to implement rioFdsetFlush(). */
// 将buf写入文件描述符集合对象
static size_t rioFdsetWrite(rio *r, const void *buf, size_t len) {
    ssize_t retval;
    int j;
    unsigned char *p = (unsigned char*) buf;
    int doflush = (buf == NULL && len == 0);    //如果buf为空且len为0,相当于flush操作

    /* To start we always append to our buffer. If it gets larger than
     * a given size, we actually write to the sockets. */
    // 将buf中的内容写到文件描述符集合对象的缓冲区中
    if (len) {
        r->io.fdset.buf = sdscatlen(r->io.fdset.buf,buf,len);
        // 设置写完的标志
        len = 0; /* Prevent entering the while below if we don't flush. */
        if (sdslen(r->io.fdset.buf) > PROTO_IOBUF_LEN) doflush = 1; //如果缓冲区太大需要冲刷到socket中
    }

    // 冲洗文件描述符集合对象,设置集合缓冲区长度和集合缓冲区地址
    if (doflush) {
        p = (unsigned char*) r->io.fdset.buf;
        len = sdslen(r->io.fdset.buf);
    }

    /* Write in little chunchs so that when there are big writes we
     * parallelize while the kernel is sending data in background to
     * the TCP socket. */
    // 一次可能无法冲洗完,需要循环多次
    while(len) {
        // 一次最多冲洗1M字节
        size_t count = len < 1024 ? len : 1024;
        int broken = 0;
        for (j = 0; j < r->io.fdset.numfds; j++) {
            // errno为0表示ok,记录不为0的文件描述符个数
            if (r->io.fdset.state[j] != 0) {
                /* Skip FDs alraedy in error. */
                broken++;
                continue;
            }

            /* Make sure to write 'count' bytes to the socket regardless
             * of short writes. */
            size_t nwritten = 0;
            // 新写的数据一次或多次写够count个字节往第一个文件描述符fd
            while(nwritten != count) {
                retval = write(r->io.fdset.fds[j],p+nwritten,count-nwritten);
                // 写失败,判断是不是写阻塞,是则设置超时
                if (retval <= 0) {
                    /* With blocking sockets, which is the sole user of this
                     * rio target, EWOULDBLOCK is returned only because of
                     * the SO_SNDTIMEO socket option, so we translate the error
                     * into one more recognizable by the user. */
                    if (retval == -1 && errno == EWOULDBLOCK) errno = ETIMEDOUT;
                    break;
                }
                nwritten += retval; //每次加上写成功的字节数
            }

            // 如果刚才写失败的情况,则将当前的文件描述符状态设置为错误的标记码
            if (nwritten != count) {
                /* Mark this FD as broken. */
                r->io.fdset.state[j] = errno;
                if (r->io.fdset.state[j] == 0) r->io.fdset.state[j] = EIO;
            }
        }
        // 所有的文件描述符都出错返回0
        if (broken == r->io.fdset.numfds) return 0; /* All the FDs in error. */
        // 更新下次要写入的地址和长度
        p += count;
        len -= count;
        r->io.fdset.pos += count;   //已写入的偏移量
    }

    if (doflush) sdsclear(r->io.fdset.buf); //释放集合缓冲区
    return 1;
}

/* Returns 1 or 0 for success/failure. */
// 文件描述符集合对象不支持读,直接返回0
static size_t rioFdsetRead(rio *r, void *buf, size_t len) {
    UNUSED(r);
    UNUSED(buf);
    UNUSED(len);
    return 0; /* Error, this target does not support reading. */
}

/* Returns read/write position in file. */
// 获取偏移量
static off_t rioFdsetTell(rio *r) {
    return r->io.fdset.pos;
}

/* Flushes any buffer to target device if applicable. Returns 1 on success
 * and 0 on failures. */
// 清洗缓冲区的值
static int rioFdsetFlush(rio *r) {
    /* Our flush is implemented by the write method, that recognizes a
     * buffer set to NULL with a count of zero as a flush request. */
    return rioFdsetWrite(r,NULL,0);
}

// 初始化一个文件描述符集合对象
static const rio rioFdsetIO = {
    rioFdsetRead,
    rioFdsetWrite,
    rioFdsetTell,
    rioFdsetFlush,
    NULL,           /* update_checksum */
    0,              /* current checksum */
    0,              /* bytes read or written */
    0,              /* read/write chunk size */
    { { NULL, 0 } } /* union for io-specific vars */
};

// 初始化一个文件描述符集合对象并设置成员变量
void rioInitWithFdset(rio *r, int *fds, int numfds) {
    int j;

    *r = rioFdsetIO;
    r->io.fdset.fds = zmalloc(sizeof(int)*numfds);
    r->io.fdset.state = zmalloc(sizeof(int)*numfds);
    memcpy(r->io.fdset.fds,fds,sizeof(int)*numfds);
    for (j = 0; j < numfds; j++) r->io.fdset.state[j] = 0;
    r->io.fdset.numfds = numfds;
    r->io.fdset.pos = 0;
    r->io.fdset.buf = sdsempty();
}

/* release the rio stream. */
// 释放文件描述符集合流对象
void rioFreeFdset(rio *r) {
    zfree(r->io.fdset.fds);
    zfree(r->io.fdset.state);
    sdsfree(r->io.fdset.buf);
}

3.4 三种对象的共同实现

  • 校验和函数
 /* ---------------------------- Generic functions ---------------------------- */
// 通用函数
/* This function can be installed both in memory and file streams when checksum
 * computation is needed. */
// 根据CRC64算法进行校验和
void rioGenericUpdateChecksum(rio *r, const void *buf, size_t len) {
    r->cksum = crc64(r->cksum,buf,len);
}
  • 设置自动同步的限制字节

/* Set the file-based rio object to auto-fsync every 'bytes' file written.
 * By default this is set to zero that means no automatic file sync is
 * performed.
 *
 * This feature is useful in a few contexts since when we rely on OS write
 * buffers sometimes the OS buffers way too much, resulting in too many
 * disk I/O concentrated in very little time. When we fsync in an explicit
 * way instead the I/O pressure is more distributed across time. */
// 设置自动同步的字节数限制,如果bytes为0,则意味着不执行
void rioSetAutoSync(rio *r, off_t bytes) {
    serverAssert(r->read == rioFileIO.read);    //限制为文件流对象,不对其他对象设置限制
    r->io.file.autosync = bytes;
}

4. 利用rio生成AOF协议

 // 以"*<count>\r\n"格式为写如一个int整型的count
size_t rioWriteBulkCount(rio *r, char prefix, int count) {
    char cbuf[128];
    int clen;

    // 构建一个 "*<count>\r\n"
    cbuf[0] = prefix;
    clen = 1+ll2string(cbuf+1,sizeof(cbuf)-1,count);
    cbuf[clen++] = '\r';
    cbuf[clen++] = '\n';
    // 调用rio的接口,将cbuf写如r中
    if (rioWrite(r,cbuf,clen) == 0) return 0;
    return clen;
}

/* Write binary-safe string in the format: "$<count>\r\n<payload>\r\n". */
// 以"$<count>\r\n<payload>\r\n"为格式写入一个字符串
size_t rioWriteBulkString(rio *r, const char *buf, size_t len) {
    size_t nwritten;

    // 写入"$<len>\r\n"
    if ((nwritten = rioWriteBulkCount(r,'$',len)) == 0) return 0;
    // 追加写入一个buf,也就是<payload>部分
    if (len > 0 && rioWrite(r,buf,len) == 0) return 0;
    // 追加"\r\n"
    if (rioWrite(r,"\r\n",2) == 0) return 0;
    return nwritten+len+2;  //返回长度
}

/* Write a long long value in format: "$<count>\r\n<payload>\r\n". */
// 以"$<count>\r\n<payload>\r\n"为格式写入一个longlong 值
size_t rioWriteBulkLongLong(rio *r, long long l) {
    char lbuf[32];
    unsigned int llen;

    // 将longlong转为字符串,按字符串的格式写入
    llen = ll2string(lbuf,sizeof(lbuf),l);
    return rioWriteBulkString(r,lbuf,llen);
}

/* Write a double value in the format: "$<count>\r\n<payload>\r\n" */
// 以"$<count>\r\n<payload>\r\n"为格式写入一个 double 值
size_t rioWriteBulkDouble(rio *r, double d) {
    char dbuf[128];
    unsigned int dlen;

    //以宽度为17位的方式写到dbuf中,17位的double双精度浮点数的长度最短且无损
    dlen = snprintf(dbuf,sizeof(dbuf),"%.17g",d);
    return rioWriteBulkString(r,dbuf,dlen);
}