当前位置:Linux教程 - Linux - 分析内核对gzip压缩文件进行解压的方法

分析内核对gzip压缩文件进行解压的方法

概述
----
1) Linux的初始内核映象以gzip压缩文件的格式存放在zImage或bzImage之中, 内核的自举
代码将它解压到1M内存开始处. 在内核初始化时, 如果加载了压缩的initrd映象, 内核会将
它解压到内存盘中, 这两处解压过程都使用了lib/inflate.c文件.

2) inflate.c是从gzip源程序中分离出来的, 包含了一些对全局数据的直接引用, 在使用时
需要直接嵌入到代码中. gzip压缩文件时总是在前32K字节的范围内寻找重复的字符串进行
编码, 在解压时需要一个至少为32K字节的解压缓冲区, 它定义为window[WSIZE].
inflate.c使用get_byte()读取输入文件, 它被定义成宏来提高效率. 输入缓冲区指针必须
定义为inptr, inflate.c中对之有减量操作. inflate.c调用flush_window()来输出window
缓冲区中的解压出的字节串, 每次输出长度用outcnt变量表示. 在flush_window()中, 还必
须对输出字节串计算CRC并且刷新crc变量. 在调用gunzip()开始解压之前, 调用makecrc()
初始化CRC计算表. 最后gunzip()返回0表示解压成功.

3) zImage或bzImage由16位引导代码和32位内核自解压映象两个部分组成. 对于zImage, 内
核自解压映象被加载到物理地址0x1000, 内核被解压到1M的部位. 对于bzImage, 内核自解
压映象被加载到1M开始的地方, 内核被解压为两个片段, 一个起始于物理地址0x2000-0x90000,
另一个起始于高端解压映象之后, 离1M开始处不小于低端片段最大长度的区域. 解压完成后,
这两个片段被合并到1M的起始位置.

解压根内存盘映象文件的代码
--------------------------

; drivers/block/rd.c
#ifdef BUILD_CRAMDISK

/*
* gzip declarations
*/

#define OF(args) args ; 用于函数原型声明的宏

#ifndef memzero
#define memzero(s, n) memset ((s), 0, (n))
#endif

typedef unsigned char uch; 定义inflate.c所使用的3种数据类型
typedef unsigned short ush;
typedef unsigned long ulg;

#define INBUFSIZ 4096 用户输入缓冲区尺寸
#define WSIZE 0x8000 /* window size--must be a power of two, and */
/* at least 32K for zip''s deflate method */

static uch *inbuf; 用户输入缓冲区,与inflate.c无关
static uch *window; 解压窗口

static unsigned insize; /* valid bytes in inbuf */
static unsigned inptr; /* index of next byte to be processed in inbuf */
static unsigned outcnt; /* bytes in output buffer */
static int exit_code;
static long bytes_out; 总解压输出长度,与inflate.c无关
static struct file *crd_infp, *crd_outfp;

#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf()) 读取输入缓冲区中一个字节

/* Diagnostic functions (stubbed out) */ 一些调试宏
#define Assert(cond,msg)
#define Trace(x)
#define Tracev(x)
#define Tracevv(x)
#define Tracec(c,x)
#define Tracecv(c,x)

#define STATIC static

static int fill_inbuf(void);
static void flush_window(void);
static void *malloc(int size);
static void free(void *where);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);

#include "../../lib/inflate.c"

static void __init *malloc(int size)
{
return kmalloc(size, GFP_KERNEL);
}

static void __init free(void *where)
{
kfree(where);
}

static void __init gzip_mark(void **ptr)
{
; 读取用户一个标记
}

static void __init gzip_release(void **ptr)
{
; 归还用户标记
}


/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
*/
static int __init fill_inbuf(void) 填充输入缓冲区
{
if (exit_code) return -1;

insize = crd_infp->f_op->read(crd_infp, inbuf, INBUFSIZ,
&crd_infp->f_pos);
if (insize == 0) return -1;

inptr = 1;

return inbuf[0];
}

/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
static void __init flush_window(void) 输出window缓冲区中outcnt个字节串
{
ulg c = crc; /* temporary variable */
unsigned n;
uch *in, ch;

crd_outfp->f_op->write(crd_outfp, window, outcnt, &crd_outfp->f_pos);
in = window;
for (n = 0; n < outcnt; n++) {
ch = *in++;
c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); 计算输出串的CRC
}
crc = c;
bytes_out += (ulg)outcnt; 刷新总字节数
outcnt = 0;
}

static void __init error(char *x) 解压出错调用的函数
{
printk(KERN_ERR "%s", x);
exit_code = 1;
}

static int __init
crd_load(struct file * fp, struct file *outfp)
{
int result;

insize = 0; /* valid bytes in inbuf */
inptr = 0; /* index of next byte to be processed in inbuf */
outcnt = 0; /* bytes in output buffer */
exit_code = 0;
bytes_out = 0;
crc = (ulg)0xffffffffL; /* shift register contents */

crd_infp = fp;
crd_outfp = outfp;
inbuf = kmalloc(INBUFSIZ, GFP_KERNEL);
if (inbuf == 0) {
printk(KERN_ERR "RAMDISK: Couldn''t allocate gzip buffer ");
return -1;
}
window = kmalloc(WSIZE, GFP_KERNEL);
if (window == 0) {
printk(KERN_ERR "RAMDISK: Couldn''t allocate gzip window ");
kfree(inbuf);
return -1;
}
makecrc();
result = gunzip();
kfree(inbuf);
kfree(window);
return result;
}

#endif /* BUILD_CRAMDISK */


32位内核自解压代码
------------------

; arch/i386/boot/compressed/head.S
.text

#include
#include

.globl startup_32 对于zImage该入口地址为0x1000; 对于bzImage为0x101000

startup_32:
cld
cli
movl $(__KERNEL_DS),%eax
movl %eax,%ds
movl %eax,%es
movl %eax,%fs
movl %eax,%gs

lss SYMBOL_NAME(stack_start),%esp # 自解压代码的堆栈为misc.c中定义的16K字节的数组
xorl %eax,%eax
1: incl %eax # check that A20 really IS enabled
movl %eax,0x000000 # loop forever if it isn''t
cmpl %eax,0x100000
je 1b

/*
* Initialize eflags. Some BIOS''s leave bits like NT set. This would
* confuse the debugger if this code is traced.
* XXX - best to initialize before switching to protected mode.
*/
pushl $0
popfl
/*
* Clear BSS 清除解压程序的BSS段
*/
xorl %eax,%eax
movl $ SYMBOL_NAME(_edata),%edi
movl $ SYMBOL_NAME(_end),%ecx
subl %edi,%ecx
cld
rep
stosb
/*
* Do the decompression, and jump to the new kernel..
*/
subl $16,%esp # place for structure on the stack
movl %esp,%eax
pushl %esi # real mode pointer as second arg
pushl %eax # address of structure as first arg
call SYMBOL_NAME(decompress_kernel)
orl %eax,%eax # 如果返回非零,则表示为内核解压为低端和高端的两个片断
jnz 3f
popl %esi # discard address
popl %esi # real mode pointer
xorl %ebx,%ebx
ljmp $(__KERNEL_CS), $0x100000 # 运行start_kernel

/*
* We come here, if we were loaded high.
* We need to move the move-in-place routine down to 0x1000
* and then start it with the buffer addresses in registers,
* which we got from the stack.
*/
3:
movl $move_routine_start,%esi
movl $0x1000,%edi
movl $move_routine_end,%ecx
subl %esi,%ecx
addl $3,%ecx
shrl $2,%ecx # 按字取整
cld
rep
movsl # 将内核片断合并代码复制到0x1000区域, 内核的片段起始为0x2000

popl %esi # discard the address
popl %ebx # real mode pointer
popl %esi # low_buffer_start 内核低端片段的起始地址
popl %ecx # lcount 内核低端片段的字节数量
popl %edx # high_buffer_start 内核高端片段的起始地址
popl %eax # hcount 内核高端片段的字节数量
movl $0x100000,%edi 内核合并的起始地址
cli # make sure we don''t get interrupted
ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine

/*
* Routine (template) for moving the decompressed kernel in place,
* if we were high loaded. This _must_ PIC-code !
*/
move_routine_start:
movl %ecx,%ebp
shrl $2,%ecx
rep
movsl # 按字拷贝第1个片段
movl %ebp,%ecx
andl $3,%ecx
rep
movsb # 传送不完全字
movl %edx,%esi
movl %eax,%ecx # NOTE: rep movsb won''t move if %ecx == 0
addl $3,%ecx
shrl $2,%ecx # 按字对齐
rep
movsl # 按字拷贝第2个片段
movl %ebx,%esi # Restore setup pointer
xorl %ebx,%ebx
ljmp $(__KERNEL_CS), $0x100000 # 运行start_kernel
move_routine_end:

; arch/i386/boot/compressed/misc.c

/*
* gzip declarations
*/

#define OF(args) args
#define STATIC static

#undef memset
#undef memcpy
#define memzero(s, n) memset ((s), 0, (n))

typedef unsigned char uch;
typedef unsigned short ush;
typedef unsigned long ulg;

#define WSIZE 0x8000 /* Window size must be at least 32k, */
/* and a power of two */

static uch *inbuf; /* input buffer */
static uch window[WSIZE]; /* Sliding window buffer */

static unsigned insize = 0; /* valid bytes in inbuf */
static unsigned inptr = 0; /* index of next byte to be processed in inbuf */
static unsigned outcnt = 0; /* bytes in output buffer */

/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
#define COMMENT 0x10 /* bit 4 set: file comment present */
#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
#define RESERVED 0xC0 /* bit 6,7: reserved */

#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf())

/* Diagnostic functions */
#ifdef DEBUG
# define Assert(cond,msg) {if(!(cond)) error(msg);}
# define Trace(x) fprintf x
# define Tracev(x) {if (verbose) fprintf x ;}
# define Tracevv(x) {if (verbose>1) fprintf x ;}
# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
# define Tracecv(c,x) {if (verbose>1 && (c)) fprintf x ;}
#else
# define Assert(cond,msg)
# define Trace(x)
# define Tracev(x)
# define Tracevv(x)
# define Tracec(c,x)
# define Tracecv(c,x)
#endif

static int fill_inbuf(void);
static void flush_window(void);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);

/*
* This is set up by the setup-routine at boot-time
*/
static unsigned char *real_mode; /* Pointer to real-mode data */

#define EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
#ifndef STANDARD_MEMORY_BIOS_CALL
#define ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
#endif
#define SCREEN_INFO (*(struct screen_info *)(real_mode+0))

extern char input_data[];
extern int input_len;

static long bytes_out = 0;
static uch *output_data;
static unsigned long output_ptr = 0;


static void *malloc(int size);
static void free(void *where);
static void error(char *m);
static void gzip_mark(void **);
static void gzip_release(void **);

static void puts(const char *);

extern int end;
static long free_mem_ptr = (long)&end;
static long free_mem_end_ptr;

#define INPLACE_MOVE_ROUTINE 0x1000 内核片段合并代码的运行地址
#define LOW_BUFFER_START 0x2000 内核低端解压片段的起始地址
#define LOW_BUFFER_MAX 0x90000 内核低端解压片段的终止地址
#define HEAP_SIZE 0x3000 为解压低码保留的堆的尺寸,堆起始于BSS的结束
static unsigned int low_buffer_end, low_buffer_size;
static int high_loaded =0;
static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/;

static char *vidmem = (char *)0xb8000;
static int vidport;
static int lines, cols;

#include "../../../../lib/inflate.c"

static void *malloc(int size)
{
void *p;

if (size <0) error("Malloc error ");
if (free_mem_ptr <= 0) error("Memory error ");

free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */

p = (void *)free_mem_ptr;
free_mem_ptr += size;

if (free_mem_ptr >= free_mem_end_ptr)
error(" Out of memory ");

return p;
}

static void free(void *where)
{ /* Don''t care */
}

static void gzip_mark(void **ptr)
{
*ptr = (void *) free_mem_ptr;
}

static void gzip_release(void **ptr)
{
free_mem_ptr = (long) *ptr;
}

static void scroll(void)
{
int i;

memcpy ( vidmem, vidmem + cols * 2, ( lines - 1 ) * cols * 2 );
for ( i = ( lines - 1 ) * cols * 2; i < lines * cols * 2; i += 2 )
vidmem[ i ] = '' '';
}

static void puts(const char *s)
{
int x,y,pos;
char c;

x = SCREEN_INFO.orig_x;
y = SCREEN_INFO.orig_y;

while ( ( c = *s++ ) != '''' ) {
if ( c == '' '' ) {
x = 0;
if ( ++y >= lines ) {
scroll();
y--;
}
} else {
vidmem [ ( x + cols * y ) * 2 ] = c;
if ( ++x >= cols ) {
x = 0;
if ( ++y >= lines ) {
scroll();
y--;
}
}
}
}

SCREEN_INFO.orig_x = x;
SCREEN_INFO.orig_y = y;

pos = (x + cols * y) * 2; /* Update cursor position */
outb_p(14, vidport);
outb_p(0xff & (pos >> 9), vidport+1);
outb_p(15, vidport);
outb_p(0xff & (pos >> 1), vidport+1);
}

void* memset(void* s, int c, size_t n)
{
int i;
char *ss = (char*)s;

for (i=0;i return s;
}

void* memcpy(void* __dest, __const void* __src,
size_t __n)
{
int i;
char *d = (char *)__dest, *s = (char *)__src;

for (i=0;i<__n;i++) d[ i ] = s[ i ];
return __dest;
}

/* ===========================================================================
* Fill the input buffer. This is called only when the buffer is empty
* and at least one byte is really needed.
*/
static int fill_inbuf(void)
{
if (insize != 0) {
error("ran out of input data ");
}

inbuf = input_data;
insize = input_len;
inptr = 1;
return inbuf[0];
}

/* ===========================================================================
* Write the output window window[0..outcnt-1] and update crc and bytes_out.
* (Used for the decompressed data only.)
*/
static void flush_window_low(void)
{
ulg c = crc; /* temporary variable */
unsigned n;
uch *in, *out, ch;

in = window;
out = &output_data[output_ptr];
for (n = 0; n < outcnt; n++) {
ch = *out++ = *in++;
c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
}
crc = c;
bytes_out += (ulg)outcnt;
output_ptr += (ulg)outcnt;
outcnt = 0;
}

static void flush_window_high(void)
{
ulg c = crc; /* temporary variable */
unsigned n;
uch *in, ch;
in = window;
for (n = 0; n < outcnt; n++) {
ch = *output_data++ = *in++;
if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start;
c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
}
crc = c;
bytes_out += (ulg)outcnt;
outcnt = 0;
}

static void flush_window(void)
{
if (high_loaded) flush_window_high();
else flush_window_low();
}

static void error(char *x)
{
puts(" ");
puts(x);
puts(" -- System halted");

while(1); /* Halt */
}

#define STACK_SIZE (4096)

long user_stack [STACK_SIZE];

struct {
long * a;
short b;
} stack_start = { & user_stack [STACK_SIZE] , __KERNEL_DS };

void setup_normal_output_buffer(void) 对于zImage, 直接解压到1M
{
#ifdef STANDARD_MEMORY_BIOS_CALL
if (EXT_MEM_K < 1024) error("Less than 2MB of memory. ");
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < 1024) error("Less than 2MB of memory. ");
#endif
output_data = (char *)0x100000; /* Points to 1M */
free_mem_end_ptr = (long)real_mode;
}

struct moveparams {
uch *low_buffer_start; int lcount;
uch *high_buffer_start; int hcount;
};

void setup_output_buffer_if_we_run_high(struct moveparams *mv)
{
high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); 内核高端片段的最小起始地址
#ifdef STANDARD_MEMORY_BIOS_CALL
if (EXT_MEM_K < (3*1024)) error("Less than 4MB of memory. ");
#else
if ((ALT_MEM_K > EXT_MEM_K ? ALT_MEM_K : EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory. ");
#endif
mv->low_buffer_start = output_data = (char *)LOW_BUFFER_START;
low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX
? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff;
low_buffer_size = low_buffer_end - LOW_BUFFER_START;
high_loaded = 1;
free_mem_end_ptr = (long)high_buffer_start;
if ( (0x100000 + low_buffer_size) > ((ulg)high_buffer_start)) {
; 如果高端片段的最小起始地址小于它实际应加载的地址,则将它置为实际地址,
; 这样高端片段就无需再次移动了,否则它要向前移动
high_buffer_start = (uch *)(0x100000 + low_buffer_size);
mv->hcount = 0; /* say: we need not to move high_buffer */
}
else mv->hcount = -1; 待定
mv->high_buffer_start = high_buffer_start;
}

void close_output_buffer_if_we_run_high(struct moveparams *mv)
{
if (bytes_out > low_buffer_size) {
mv->lcount = low_buffer_size;
if (mv->hcount)
mv->hcount = bytes_out - low_buffer_size; 求出高端片段的字节数
} else { 如果解压后内核只有低端的一个片段
mv->lcount = bytes_out;
mv->hcount = 0;
}
}

int decompress_kernel(struct moveparams *mv, void *rmode)
{
real_mode = rmode;

if (SCREEN_INFO.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
vidmem = (char *) 0xb8000;
vidport = 0x3d4;
}

lines = SCREEN_INFO.orig_video_lines;
cols = SCREEN_INFO.orig_video_cols;

if (free_mem_ptr < 0x100000) setup_normal_output_buffer();
else setup_output_buffer_if_we_run_high(mv);

makecrc();
puts("Uncompressing Linux... ");
gunzip();
puts("Ok, booting the kernel. ");
if (high_loaded) close_output_buffer_if_we_run_high(mv);
return high_loaded;
}

Edited by lucian_yao on 04/28/01 01:36 PM.