1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162 | /**********************************************************************************
*
* Copyright (C) 2006 OhioLINK
*
* This file is part of the OhioLINK Digital Resource Commons (DRC) Project.
*
* The OhioLINK DRC is free software; you can redistribute it and/or
* modify it under the terms of the Affero General Public License as
* published by Affero, Inc. -- either version 1 of the License, or
* (at your option) any later version.
*
* The OhioLINK DRC Project is distributed in the hope that it will be
* useful, but WITHOUT ANY WARRANTY -- without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Affero General Public License for more details.
*
* You should have received a copy of the Affero General Public
* License in the LICENSE.txt file that comes with the DRC project;
* if not, write to DRC Development Team, OhioLINK, 2455 North Star Rd,
* Suite 300, Columbus, OH 43221, USA.
*********************************************************************************/
package batch;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.MalformedURLException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import fedora.client.FedoraClient;
import fedora.server.access.FedoraAPIA;
import fedora.server.management.FedoraAPIM;
import fedora.server.types.gen.DatastreamDef;
import fedora.server.types.gen.MIMETypedStream;
public class Batch {
public static void main(String[] args) {
for (int i = 80; i < 81; i++) {
// "hdl" is our FEDORA PID prefix
String pid = "hdl:" + i;
try {
FedoraClient client = new FedoraClient(
"http://fedora.server/fedora",
"fedoraAdmin", "password");
FedoraAPIA apia = client.getAPIA();
FedoraAPIM apim = client.getAPIM();
//
// Get the list of datastreams for this object. For each one, we're
// going to look for an identifier that ends in "etd"
DatastreamDef[] datastreams = apia.listDatastreams(pid, null);
for (int j = 0; j < datastreams.length; j++) {
DatastreamDef def = datastreams[j];
String itemId = def.getID();
if (itemId.endsWith("etd")) {
//
// If we've found it, get it out of the FEDORA server and
// create a XML DOM document for it
MIMETypedStream ds = apia.getDatastreamDissemination(pid,itemId,null);
byte[] file = ds.getStream();
InputStream inputStream = new ByteArrayInputStream(file);
// String fileStr = new String(file, "ascii");
// System.out.println(fileStr);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document sourceDoc = builder.parse(inputStream);
//
// Now build an empty XML DOM document for the Dublin Core
Document destDoc = builder.newDocument();
Element rootElement=destDoc.createElementNS("http://www.openarchives.org/OAI/2.0/oai_dc/","oai_dc:dc");
rootElement.setAttributeNS("http://www.w3.org/2000/xmlns/","xmlns:oai_dc","http://www.openarchives.org/OAI/2.0/oai_dc/");
rootElement.setAttributeNS("http://www.w3.org/2000/xmlns/","xmlns:dc","http://purl.org/dc/elements/1.1/");
destDoc.appendChild(rootElement);
//
// Now copy the values from the ETD XML document into
// the DC XML document
Element e; String value;
e=destDoc.createElement("dc:identifier");
e.appendChild(destDoc.createTextNode(pid));
rootElement.appendChild(e);
e=destDoc.createElement("dc:title");
value=sourceDoc.getElementsByTagName("title").item(0).getTextContent().replaceAll("[\t ]*\n[\t ]*", " ").replaceAll("[\t ][\t ]+", " ").trim();
e.appendChild(destDoc.createTextNode(value));
rootElement.appendChild(e);
// author's name comes in many parts; this'll put them together
e = destDoc.createElement("dc:creator");
String nameFields[] = { "authfname", "authmname", "authlname", "authsuffix"};
String author = new String();
for (String field : nameFields) {
value = sourceDoc.getElementsByTagName(field).item(0).getTextContent().replaceAll("[\t ]*\n[\t ]*", " ").replaceAll("[\t ][\t ]+", " ").trim();
if (value != null && !value.equals("")) {
author = author.concat(value).concat(" ");
}
}
e.appendChild(destDoc.createTextNode(author.trim()));
rootElement.appendChild(e);
e=destDoc.createElement("dc:language");
value=sourceDoc.getElementsByTagName("language").item(0).getTextContent().replaceAll("[\t ]*\n[\t ]*", " ").replaceAll("[\t ][\t ]+", " ").trim();
e.appendChild(destDoc.createTextNode(value));
rootElement.appendChild(e);
e=destDoc.createElement("dc:description");
value=sourceDoc.getElementsByTagName("abstract").item(0).getTextContent().replaceAll("[\t ]*\n[\t ]*", " ").replaceAll("[\t ][\t ]+", " ").trim();
e.appendChild(destDoc.createTextNode(value));
rootElement.appendChild(e);
e=destDoc.createElement("dc:date");
value=sourceDoc.getElementsByTagName("docyear").item(0).getTextContent().replaceAll("[\t ]*\n[\t ]*", " ").replaceAll("[\t ][\t ]+", " ").trim();
e.appendChild(destDoc.createTextNode(value));
rootElement.appendChild(e);
e=destDoc.createElement("dc:subject");
value = sourceDoc.getElementsByTagName("subjects").item(0).getTextContent().replaceAll("[\t ]*\n[\t ]*", " ").replaceAll("[\t ][\t ]+", " ").trim();
e.appendChild(destDoc.createTextNode(value));
rootElement.appendChild(e);
//
// Use a Transformer for output
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer transformer = tFactory.newTransformer();
transformer.setOutputProperty(javax.xml.transform.OutputKeys.OMIT_XML_DECLARATION, "yes");
DOMSource source = new DOMSource(destDoc);
StringWriter strWriter = new StringWriter();
StreamResult result = new StreamResult(strWriter);
transformer.transform(source, result);
String xmlAsString=strWriter.getBuffer().toString();
// System.out.println(xmlAsString);
byte[] normalarr=xmlAsString.getBytes("UTF-8");
//
// Lastly, write the modified DC datastream back to the FEDORA server
apim.modifyDatastreamByValue(pid, "DC", null, "Dublin Core", false, "text/xml", null, normalarr, "A", "Batch program to add DC datastream from ETD XML file", false);
}
}
} catch (MalformedURLException e) {
System.out.println(pid+" "+e.getLocalizedMessage());
} catch (Exception e) {
System.out.println(pid+" "+e.getLocalizedMessage());
}
}
}
}
|