module libHuffman !------------------------------------------------------------------------------ !Program: Huffman Encoder/Decoder Library !Author: Chris Harper !Date: 5/1/2008 !------------------------------------------------------------------------------ implicit none integer, parameter :: MAX_CODE = 256 !a character frequency data type type ch_data character :: ch integer :: freq real :: rel_freq end type !a tree node character frequency data type type tree_node character :: ch real :: rel_freq type (tree_node), pointer :: left, right end type !a huffman code type type code_data character :: ch character(16) :: code end type contains !this function returns an array of relative character frequencies function build_table (filename) type(ch_data), pointer :: build_table(:) character(256), intent(IN) :: filename type(ch_data), dimension (MAX_CODE) :: ch_freq integer :: total_chars, i, k, code, EOF, table_size character :: ch table_size = 0 !initialize the character frequency array do i = 1, MAX_CODE ch_freq(i)%ch = char(i) ch_freq(i)%freq = 0 end do !need to check for multibyte encoding. right now only works with 1 byte chars open (unit = 10, file = filename, status = "old", form = "unformatted", & access = "direct", recl = 1, err=99) !read the file in one character at a time i = 1 do read (10, rec = i, iostat = EOF) ch if (EOF < 0) then exit else if (EOF > 0) then !if error, print a message and exit print *, "File read error occured. Error Number: ", EOF exit endif code = ichar(ch) if (code > MAX_CODE) then print *, "Character ", ch, " (ASCII ", code, ") greater than max code (", & MAX_CODE, "). Skipping." else ch_freq(code)%freq = ch_freq(code)%freq + 1 total_chars = total_chars + 1 end if i = i + 1 end do !getting the number of distinct characters do i = 1, MAX_CODE if (ch_freq(i)%freq > 0) table_size = table_size + 1 end do allocate(build_table(table_size)) !move only the relevant characters to the returned array !calculating the relative frequency as we go k = 1 do i = 1, MAX_CODE if (ch_freq(i)%freq > 0) then build_table(k)%ch = ch_freq(i)%ch build_table(k)%freq = ch_freq(i)%freq build_table(k)%rel_freq = real(ch_freq(i)%freq) / real(total_chars) !print *, build_table(k)%ch, build_table(k)%freq, build_table(k)%rel_freq k = k + 1 end if end do return 99 continue print *, "Error Opening File" end function !the function builds a binary tree and then uses it to create huffman codes function build_tree (ch_rel_freq) result (code_table) type(ch_data) :: ch_rel_freq(:) type(code_data), pointer :: code_table(:) type retarded_pointer type(tree_node), pointer :: ptr end type type(retarded_pointer) :: search_table(size(ch_rel_freq)+(size(ch_rel_freq)-1)) type(tree_node), pointer :: root integer :: node1, node2 integer :: i, j, k, next_free_node !build an array of pointers to all the possible nodes in the tree !used for easier searching do i = 1, size(ch_rel_freq) allocate(search_table(i)%ptr) search_table(i)%ptr%ch = ch_rel_freq(i)%ch search_table(i)%ptr%rel_freq = ch_rel_freq(i)%rel_freq nullify (search_table(i)%ptr%left) nullify (search_table(i)%ptr%right) end do next_free_node = size(ch_rel_freq) + 1 !loop through the search table to find the two lowest frequency nodes !then point a free node at them and sum frequencies node1 = 0 node2 = 0 do i = 1, size(ch_rel_freq)-1 do j = 1, size(search_table) if (associated(search_table(j)%ptr)) then !set the first two nodes we come across to the lowest freq if (node1 == 0) then node1 = j goto 110 else if (node2 == 0) then node2 = j goto 110 end if !compare lowest freq nodes to the rest to find actual lowest freq if (search_table(j)%ptr%rel_freq < search_table(node1)%ptr%rel_freq) then node1 = j else if (search_table(j)%ptr%rel_freq < search_table(node2)%ptr%rel_freq) then node2 = j end if 110 continue end if end do !now that we have the lowest two frequency nodes, !get the next free node to point to them allocate(search_table(next_free_node)%ptr) search_table(next_free_node)%ptr%ch = "*" search_table(next_free_node)%ptr%rel_freq = search_table(node1)%ptr%rel_freq & + search_table(node2)%ptr%rel_freq search_table(next_free_node)%ptr%left => search_table(node1)%ptr search_table(next_free_node)%ptr%right => search_table(node2)%ptr !debug line print *, "Creating node from ", search_table(node1)%ptr%ch, " (", & search_table(node1)%ptr%rel_freq, ") and ", search_table(node2)%ptr%ch, & " (", search_table(node2)%ptr%rel_freq, ")" !clean up nullify(search_table(node1)%ptr) nullify(search_table(node2)%ptr) node1 = 0 node2 = 0 next_free_node = next_free_node + 1 end do root => search_table(next_free_node-1)%ptr !allocate the final code table allocate(code_table(MAX_CODE)) do i = 1, MAX_CODE code_table(i)%ch = char(0) end do call LRcode(root, repeat(" ", 20), code_table) end function !sub to traverse a binary tree to build a huffman code table recursive subroutine LRcode(node, code, code_table) type(tree_node) :: node type(code_data), pointer :: code_table(:) character(16) :: code, new_code integer :: i if ((.NOT. associated(node%left)) .AND. (.NOT. associated(node%right))) then code_table(ichar(node%ch))%ch = node%ch code_table(ichar(node%ch))%code = code else if (associated(node%left)) then new_code = trim(code) // "0" call LRcode(node%left, new_code, code_table) end if if (associated(node%right)) then new_code = trim(code) // "1" call LRcode(node%right, new_code, code_table) end if end if end subroutine !encodes a given file using a huffman code table subroutine encode(filename, code_table) character(256), intent(IN) :: filename type(code_data), pointer :: code_table(:) integer :: i, EOF character :: ch !output file open(unit = 20, file = "encoded.txt", action = "write", status = "unknown", & position = "rewind") !input file open (unit = 10, file = filename, status = "old", form = "unformatted", & access = "direct", recl = 1) i = 1 do read (10, rec = i, iostat = EOF) ch if (EOF < 0) then exit else if (EOF > 0) then !if error, print a message and exit print *, "File read error occured. Error Number: ", EOF exit endif !write coded characters if (ichar(ch) <= MAX_CODE) then write(unit = 20, fmt = '(a)', advance = "no") trim(code_table(ichar(ch))%code) end if i = i + 1 end do close(10) close(20) end subroutine !decodes the selected file using the selected code file subroutine decode(filename, code_filename) character(256) :: filename, code_filename type(code_data), pointer :: code_table(:) integer :: i, ERR, EOF, num_codes, char_code character :: ch !find the number of codes num_codes = 0 open(unit = 10, file = code_filename, status = "old", iostat = ERR) if (ERR == 0) then do read(10, "(A1)", iostat = EOF) ch if (EOF < 0) then exit else if (EOF > 0) then print *, "File read error occured. Error Number: ", EOF stop endif num_codes = num_codes + 1 end do close(10) else print *, "File open error occured. Error Number: ", ERR stop end if !then get the huffman codes if (num_codes > 0) then !alloc the table array allocate(code_table(num_codes)) open(unit = 11, file = code_filename, status = "old") do i = 1, size(code_table) read(11, "(A1, 2X, I3, 3X, A16)") ch, char_code, code_table(i)%code code_table(i)%ch = char(char_code) print "(A1, 3X, A16)", code_table(i)%ch, code_table(i)%code end do close(11) else stop end if !the actual decoding would go here return 99 continue print *, "Error Opening File: ", filename end subroutine end module