Παράδειγμα: επεξεργασία αρχείων καταγραφής πρόσβασης σε ιστοσελίδες

import java.util.*;
import java.util.regex.*;
import java.io.*;

/**
 * Collect and print Web statistics
 * @author D. Spinellis
 */
class WebStats {

    /**
     * Increment the integer value of map's member by 1
     * The member is obtained by using the matcher to extract
     * the specified group from the string s
     */
    static void increment(Map<String, Integer> map, String s, Matcher m, int group) {
        String member = s.substring(m.start(group), m.end(group));
        Integer i = map.get(member);
        map.put(member, i == null ? 1 : i + 1);
    }

    /** List the contents of the given map */
    static void list(String title, Map<String, Integer> map) {
        System.out.println("\n" + title);
        for (Map.Entry e : map.entrySet())
            System.out.println(e.getValue() + " " + e.getKey());
    }

    /** List the contents of the given map ordered by their values.
     * (You are not expected to undestand this).
     */
    static void sortedList(String title, Map<String, Integer> map) {
        System.out.println("\n" + title);
        TreeSet <Map.Entry<String, Integer>> valueOrder
            = new TreeSet<Map.Entry<String, Integer>>(new
            Comparator<Map.Entry<String, Integer>>() {
                public int compare(Map.Entry<String, Integer> a,
                        Map.Entry<String, Integer> b) {
                    return (-a.getValue().compareTo(b.getValue()));
                }
            }
        );
        valueOrder.addAll(map.entrySet());
        for (Map.Entry e : valueOrder)
            System.out.println(e.getValue() + " " + e.getKey());
    }



    public static void main(String args[]) {
        if (args.length != 1) {
            System.err.println("Usage: WebStats file");
            System.exit(1);
        }

        Pattern cre = null;        // Compiled RE
        try {
            // A standard log line is a line like:
            // 192.168.136.16 - - [26/Jan/2004:19:45:48 +0200] "GET /c136.html HTTP/1.1" 200 1674 "http://office/c120.html" "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.5) Gecko/20031007"
            cre = Pattern.compile(
            "([-\\w.]+)\\s+" +    // 1. Host
            "([-\\w]+)\\s+" +    // 2. Logname
            "([-\\w]+)\\s+" +    // 3. User
            "\\[(\\d+)/" +        // 4. Date
            "(\\w+)/" +        // 5. Month
            "(\\d+):" +        // 6. Year
            "(\\d+):" +        // 7. Hour
            "(\\d+)" +        // 8. Minute
            "([^]]+?)\\]\\s+" +    // 9. Rest of time
            "\"([-\\w]+)\\s*" +    // 10. Request verb
            "([^\\s]*)" +        // 11. Request URL
            "([^\"]*?)\"\\s+" +    // 12. Request protocol etc.
            "(\\d+)\\s+" +        // 13. Status
            "([-\\d]+)\\s+" +    // 14. Bytes
            "\"([^\"]*)\"\\s+" +    // 15. Referrer URL
            "\"([^\"]*)\""        // 16. Client
            );
        } catch (PatternSyntaxException e) {
            System.err.println("Invalid RE syntax: " + e.getDescription());
            System.exit(1);
        }

        BufferedReader in = null;
        try {
            in = new BufferedReader(new InputStreamReader(new FileInputStream(args[0])));
        } catch (FileNotFoundException e) {
            System.err.println("Unable to open file " + args[1] + ": " + e.getMessage());
            System.exit(1);
        }

        HashMap<String, Integer> host, hour, request, referrer;
        host = new HashMap<String, Integer>();
        hour = new HashMap<String, Integer>();
        request = new HashMap<String, Integer>();
        referrer = new HashMap<String, Integer>();
        try {
            String s;
            while ((s = in.readLine()) != null) {
                Matcher m = cre.matcher(s);
                if (!m.matches())
                    System.out.println("Invalid line: " + s);
                else {
                    increment(host, s, m, 1);
                    increment(hour, s, m, 7);
                    increment(request, s, m, 11);
                    increment(referrer, s, m, 15);
                }
            }
        } catch (Exception e) {
            System.err.println("Error reading line: " + e.getMessage());
            System.exit(1);
        }
        sortedList("Host Access Counts", host);
        sortedList("Hourly Access Counts", hour);
        sortedList("Request URL Access Counts", request);
        sortedList("Referrer URL Access Counts", referrer);
    }
}